7 files changed, 1683 insertions, 341 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
index 7bbe1e2a49..086946e4e3 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
@@ -3,17 +3,22 @@ package backend.jvm
 package analysis
 
 import scala.annotation.switch
-import scala.collection.{mutable, immutable}
+import scala.collection.mutable
 import scala.tools.asm.Opcodes
 import scala.tools.asm.tree._
 import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter}
 import opt.BytecodeUtils._
+import AliasSet.SmallBitSet
 
-object AliasingFrame {
-  private var _idCounter: Long = 0l
-  private def nextId = { _idCounter += 1; _idCounter }
-}
-
+/**
+ * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack.
+ *
+ * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple
+ * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower
+ * than a BasicValue analysis.
+ *
+ * See the doc of package object `analysis` for some notes on the performance of alias analysis.
+ */
 class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) {
   import Opcodes._
 
@@ -23,63 +28,80 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
     init(src)
   }
 
-  /**
-   * For each slot (entry in the `values` array of the frame), an id that uniquely represents
-   * the object stored in it. If two values have the same id, they are aliases of the same
-   * object.
-   */
-  private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId)
+  override def toString: String = super.toString + " - " + aliases.toList.filter(s => s != null && s.size > 1).map(_.toString).distinct.mkString(",")
 
   /**
-   * The object alias id of for a value index.
-   */
-  def aliasId(entry: Int) = aliasIds(entry)
-
-  /**
-   * Returns the indices of the values array which are aliases of the object `id`.
+   * For every value the set of values that are aliases of it.
+   *
+   * Invariants:
+   *  - If `aliases(i) == null` then i has no aliases. This is equivalent to having
+   *    `aliases(i) == SingletonSet(i)`.
+   *  - If `aliases(i) != null` then `aliases(i) contains i`.
+   *  - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the
+   *    same (mutable) AliasSet.
    */
-  def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id)
+  val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize)
 
   /**
    * The set of aliased values for a given entry in the `values` array.
    */
-  def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry))
+  def aliasesOf(entry: Int): AliasSet = {
+    if (aliases(entry) != null) aliases(entry)
+    else {
+      val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1)
+      aliases(entry) = init
+      init
+    }
+  }
 
   /**
-   * Define a new alias. For example, given
-   *   var a = this       // this, a have the same aliasId
-   * then an assignment
+   * Define a new alias. For example, an assignment
    *   b = a
-   * will set the same the aliasId for `b`.
+   * adds b to the set of aliases of a.
    */
   private def newAlias(assignee: Int, source: Int): Unit = {
-    aliasIds(assignee) = aliasIds(source)
+    removeAlias(assignee)
+    val sourceAliases = aliasesOf(source)
+    sourceAliases += assignee
+    aliases(assignee) = sourceAliases
   }
 
   /**
-   * An assignment
+   * Remove an alias. For example, an assignment
    *   a = someUnknownValue()
-   * sets a fresh alias id for `a`.
-   * A stack value is also removed from its alias set when being consumed.
+   * removes a from its former alias set.
+   * As another example, stack values are removed from their alias sets when being consumed.
    */
   private def removeAlias(assignee: Int): Unit = {
-    aliasIds(assignee) = AliasingFrame.nextId
+    if (aliases(assignee) != null) {
+      aliases(assignee) -= assignee
+      aliases(assignee) = null
+    }
+  }
+
+  /**
+   * Define the alias set for a given value.
+   */
+  private def setAliasSet(assignee: Int, set: AliasSet): Unit = {
+    if (aliases(assignee) != null) {
+      aliases(assignee) -= assignee
+    }
+    aliases(assignee) = set
   }
 
   override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = {
-    // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop)
+    // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop)
     def stackTop: Int = this.stackTop
     def peekStack(n: Int): V = this.peekStack(n)
 
-    // the val pattern `val (p, c) = f` still allocates a tuple (https://github.com/scala-opt/scala/issues/28)
-    val prodCons = InstructionStackEffect(insn, this) // needs to be called before super.execute, see its doc
-    val consumed = prodCons._1
-    val produced = prodCons._2
+    val prodCons = InstructionStackEffect.forAsmAnalysis(insn, this) // needs to be called before super.execute, see its doc
+    val consumed = InstructionStackEffect.cons(prodCons)
+    val produced = InstructionStackEffect.prod(prodCons)
 
     super.execute(insn, interpreter)
 
     (insn.getOpcode: @switch) match {
-      case ALOAD =>
+      case ILOAD | LLOAD | FLOAD | DLOAD | ALOAD =>
         newAlias(assignee = stackTop, source = insn.asInstanceOf[VarInsnNode].`var`)
 
       case DUP =>
@@ -166,31 +188,54 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
         }
 
       case SWAP =>
+        // could be written more elegantly with higher-order combinators, but thinking of performance
         val top = stackTop
-        val idTop = aliasIds(top)
-        aliasIds(top)     = aliasIds(top - 1)
-        aliasIds(top - 1) = idTop
 
-      case opcode =>
-        if (opcode == ASTORE) {
-          // Not a separate case because we need to remove the consumed stack value from alias sets after.
-          val stackTopBefore = stackTop - produced + consumed
-          val local = insn.asInstanceOf[VarInsnNode].`var`
-          newAlias(assignee = local, source = stackTopBefore)
-          // if the value written is size 2, it overwrites the subsequent slot, which is then no
-          // longer an alias of anything. see the corresponding case in `Frame.execute`.
-          if (getLocal(local).getSize == 2)
-            removeAlias(local + 1)
-
-          // if the value at the preceding index is size 2, it is no longer valid, so we remove its
-          // aliasing. see corresponding case in `Frame.execute`
-          if (local > 0) {
-            val precedingValue = getLocal(local - 1)
-            if (precedingValue != null && precedingValue.getSize == 2)
-              removeAlias(local - 1)
+        def moveNextToTop(): Unit = {
+          val nextAliases = aliases(top - 1)
+          aliases(top) = nextAliases
+          nextAliases -= (top - 1)
+          nextAliases += top
+        }
+
+        if (aliases(top) != null) {
+          val topAliases = aliases(top)
+          if (aliases(top - 1) != null) moveNextToTop()
+          else aliases(top) = null
+          // move top to next
+          aliases(top - 1) = topAliases
+          topAliases -= top
+          topAliases += (top - 1)
+        } else {
+          if (aliases(top - 1) != null) {
+            moveNextToTop()
+            aliases(top - 1) = null
           }
         }
 
+      case opcode =>
+        (opcode: @switch) match {
+          case ISTORE | LSTORE | FSTORE | DSTORE | ASTORE =>
+            // not a separate case: we re-use the code below that removes the consumed stack value from alias sets
+            val stackTopBefore = stackTop - produced + consumed
+            val local = insn.asInstanceOf[VarInsnNode].`var`
+            newAlias(assignee = local, source = stackTopBefore)
+            // if the value written is size 2, it overwrites the subsequent slot, which is then no
+            // longer an alias of anything. see the corresponding case in `Frame.execute`.
+            if (getLocal(local).getSize == 2)
+              removeAlias(local + 1)
+
+            // if the value at the preceding index is size 2, it is no longer valid, so we remove its
+            // aliasing. see corresponding case in `Frame.execute`
+            if (local > 0) {
+              val precedingValue = getLocal(local - 1)
+              if (precedingValue != null && precedingValue.getSize == 2)
+                removeAlias(local - 1)
+            }
+
+          case _ =>
+        }
+
         // Remove consumed stack values from aliasing sets.
         // Example: iadd
         //  - before: local1, local2, stack1, consumed1, consumed2
@@ -198,10 +243,22 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
         val firstConsumed = stackTop - produced + 1                 // firstConsumed = 3
         for (i <- 0 until consumed)
           removeAlias(firstConsumed + i)                            // remove aliases for 3 and 4
+    }
+  }
 
-        // We don't need to set the aliases ids for the produced values: the aliasIds array already
-        // contains fresh ids for non-used stack values (ensured by removeAlias).
+  /**
+   * When entering an exception handler, all values are dropped from the stack (and the exception
+   * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To
+   * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets.
+   */
+  override def clearStack(): Unit = {
+    var i = getLocals
+    val end = i + getStackSize
+    while (i < end) {
+      removeAlias(i)
+      i += 1
     }
+    super.clearStack()
   }
 
   /**
@@ -217,30 +274,131 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
    *   x = a
    *   y = b     // (x, a) and (y, b)
    * }
-   * [...]       // (x, a)
+   * [...]       // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b))
    */
   override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = {
+    // merge is the main performance hot spot of a data flow analysis.
+
+    // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of
+    // the overall analysis time.
     val valuesChanged = super.merge(other, interpreter)
+
+    // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this
+    // code has been heavily optimized. most of the time is spent in the `hasNext` method of the
+    // andNotIterator, see its comment.
+
     var aliasesChanged = false
     val aliasingOther = other.asInstanceOf[AliasingFrame[_]]
-    for (i <- aliasIds.indices) {
-      val thisAliases = aliasesOf(i)
-      val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i))
-      if (thisNotOther.nonEmpty) {
-        aliasesChanged = true
-        thisNotOther foreach removeAlias
+
+    val numValues = getLocals + getStackSize
+    // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set
+    // for a, we already see that a and b will be aliases in the final result. so we can skip over
+    // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be
+    // set to `true`.
+    val knownOk = new Array[Boolean](numValues)
+    var i = 0
+    while (i < numValues) {
+      if (!knownOk(i)) {
+        val thisAliases = this.aliases(i)
+        val otherAliases = aliasingOther.aliases(i)
+        if (thisAliases != null) {
+          if (otherAliases == null) {
+            if (thisAliases.size > 1) {
+              aliasesChanged = true
+              removeAlias(i)
+            }
+          } else {
+            // The iterator yields elements that are in `thisAliases` but not in `otherAliases`.
+            // As a side-effect, for every index `i` that is in both alias sets, the iterator sets
+            // `knownOk(i) = true`: the alias sets for these values don't need to be merged again.
+            val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk)
+            if (thisNotOtherIt.hasNext) {
+              aliasesChanged = true
+              val newSet = AliasSet.empty
+              while (thisNotOtherIt.hasNext) {
+                val next = thisNotOtherIt.next()
+                newSet += next
+                setAliasSet(next, newSet)
+              }
+            }
+          }
+        }
       }
+      i += 1
     }
+
     valuesChanged || aliasesChanged
   }
 
+  private def min(s: SmallBitSet) = {
+    var r = s.a
+    if (             s.b < r) r = s.b
+    if (s.c != -1 && s.c < r) r = s.c
+    if (s.d != -1 && s.d < r) r = s.d
+    r
+  }
+
   override def init(src: Frame[_ <: V]): Frame[V] = {
-    super.init(src)
-    compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length)
+    super.init(src) // very quick (just an arraycopy)
+    System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick
+
+    val newSets = mutable.HashMap.empty[AliasSet, AliasSet]
+
+    // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to
+    // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time.
+    // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are
+    // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the
+    // two values.
+
+    var i = 0
+    while (i < aliases.length) {
+      val set = aliases(i)
+      if (set != null) {
+        // size cannot be 0 - alias sets are always at least singletons.
+        // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow
+        if (set.size == 1) {
+          aliases(i) = null
+        } else if (set.size <= 4) {
+          val small = set.set.asInstanceOf[AliasSet.SmallBitSet]
+          val firstOfSet = i == min(small)
+          if (firstOfSet) {
+            val newSet = set.clone()
+            aliases(small.a) = newSet
+            aliases(small.b) = newSet
+            if (small.c != -1) aliases(small.c) = newSet
+            if (small.d != -1) aliases(small.d) = newSet
+          }
+        } else {
+          // the actual hot spot is the hash map operations here: this is where almost all of the 20%
+          // mentioned above is spent.
+          // i also benchmarked an alternative implementation: keep an array of booleans for indexes
+          // that already contain the cloned set. iterate through all elements of the cloned set and
+          // assign the cloned set. this approach is 50% slower than using a hash map.
+          if (newSets contains set) aliases(i) = newSets(set)
+          else {
+            val newSet = set.clone()
+            newSets(set) = newSet
+            aliases(i) = newSet
+          }
+        }
+      }
+      i += 1
+    }
     this
   }
 }
 
+object AliasingFrame {
+//  val start1 = AliasingFrame.timer1.start()
+//  AliasingFrame.timer1.stop(start1)
+  import scala.reflect.internal.util.Statistics._
+  val timer1 = newTimer("t1", "jvm")
+  val timer2 = newTimer("t2", "jvm")
+  val timer3 = newTimer("t3", "jvm")
+  val timers = List(timer1, timer2, timer3)
+  def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 }
+}
+
 /**
  * An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis
  * needs to track aliases, but doesn't require a more specific Frame subclass.
@@ -249,3 +407,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer
   override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack)
   override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src)
 }
+
+/**
+ * An iterator over Int (required to prevent boxing the result of next).
+ */
+abstract class IntIterator extends Iterator[Int] {
+  def hasNext: Boolean
+  def next(): Int
+}
+
+/**
+ * An efficient mutable bit set.
+ *
+ * @param set  Either a SmallBitSet or an Array[Long]
+ * @param size The size of the set, useful for performance of certain operations
+ */
+class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) {
+  import AliasSet._
+
+  override def toString: String = iterator.toSet.mkString("<", ",", ">")
+
+  /**
+   * An iterator for the elements of this bit set. Note that only one iterator can be used at a
+   * time. Also make sure not to change the underlying AliasSet during iteration.
+   */
+  def iterator: IntIterator = andNotIterator(this, empty, null)
+
+  def +=(value: Int): Unit = this.set match {
+    case s: SmallBitSet => (size: @switch) match {
+      case 0 =>                                                     s.a = value; size = 1
+      case 1 => if (value != s.a)                                 { s.b = value; size = 2 }
+      case 2 => if (value != s.a && value != s.b)                 { s.c = value; size = 3 }
+      case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 }
+      case 4 =>
+        if (value != s.a && value != s.b && value != s.c && value != s.d) {
+          this.set = bsEmpty
+          this.size = 0
+          bsAdd(this, s.a)
+          bsAdd(this, s.b)
+          bsAdd(this, s.c)
+          bsAdd(this, s.d)
+          bsAdd(this, value)
+        }
+    }
+    case bits: Array[Long] =>
+      bsAdd(this, value)
+  }
+
+  def -=(value: Int): Unit = this.set match {
+    case s: SmallBitSet => (size: @switch) match {
+      case 0 =>
+      case 1 =>
+        if      (value == s.a) { s.a = -1; size = 0 }
+      case 2 =>
+        if      (value == s.a) { s.a = s.b; s.b = -1; size = 1 }
+        else if (value == s.b) {            s.b = -1; size = 1 }
+      case 3 =>
+        if      (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 }
+        else if (value == s.b) {            s.b = s.c; s.c = -1; size = 2 }
+        else if (value == s.c) {                       s.c = -1; size = 2 }
+      case 4 =>
+        if      (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.b) {            s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.c) {                       s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.d) {                                  s.d = -1; size = 3 }
+    }
+    case bits: Array[Long] =>
+      bsRemove(this, value)
+      if (this.size == 4)
+        this.set = bsToSmall(this.set.asInstanceOf[Array[Long]])
+  }
+
+  override def clone(): AliasSet = {
+    val resSet = this.set match {
+      case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d)
+      case bits: Array[Long] => bits.clone()
+    }
+    new AliasSet(resSet, this.size)
+  }
+}
+
+object AliasSet {
+  def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0)
+
+  final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) {
+    override def toString = s"($a, $b, $c, $d)"
+  }
+
+  def bsEmpty: Array[Long] = new Array[Long](1)
+
+  private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = {
+    if (index < set.length) set
+    else {
+      var newLength = set.length
+      while (index >= newLength) newLength *= 2
+      val newSet = new Array[Long](newLength)
+      Array.copy(set, 0, newSet, 0, set.length)
+      newSet
+    }
+  }
+
+  def bsAdd(set: AliasSet, bit: Int): Unit = {
+    val bits = set.set.asInstanceOf[Array[Long]]
+    val index = bit >> 6
+    val resSet = bsEnsureCapacity(bits, index)
+    val before = resSet(index)
+    val result = before | (1l << bit)
+    if (result != before) {
+      resSet(index) = result
+      set.set = resSet
+      set.size += 1
+    }
+  }
+
+  def bsRemove(set: AliasSet, bit: Int): Unit = {
+    val bits = set.set.asInstanceOf[Array[Long]]
+    val index = bit >> 6
+    if (index < bits.length) {
+      val before = bits(index)
+      val result = before & ~(1l << bit)
+      if (result != before) {
+        bits(index) = result
+        set.size -= 1
+      }
+    }
+  }
+
+  def bsContains(set: Array[Long], bit: Int): Boolean = {
+    val index = bit >> 6
+    bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L
+  }
+
+//  var sizesHist: Array[Int] = new Array[Int](1000)
+
+  /**
+   * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits.
+   */
+  def bsToSmall(bits: Array[Long]): SmallBitSet = {
+    var a = -1
+    var b = -1
+    var c = -1
+    var i = 0
+    val end = bits.length * 64
+    while (i < end) {
+      if (bsContains(bits, i)) {
+        if (a == -1) a = i
+        else if (b == -1) b = i
+        else if (c == -1) c = i
+        else return new SmallBitSet(a, b, c, i)
+      }
+      i += 1
+    }
+    null
+  }
+
+  /**
+   * An iterator that yields the elements that are in one bit set and not in another (&~).
+   */
+  private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator {
+    // values in the first bit set
+    private var a, b, c, d = -1
+    private var xs: Array[Long] = null
+
+    // values in the second bit set
+    private var notA, notB, notC, notD = -1
+    private var notXs: Array[Long] = null
+
+    // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext
+    private var abcdNext = -1
+
+    // counts through elements in the `xs` bit set
+    private var i = 0
+    // true if the current value of `i` should be returned by this iterator
+    private var iValid = false
+
+    setA.set match {
+      case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d
+      case bits: Array[Long] => xs = bits
+    }
+
+    setB.set match {
+      case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d
+      case bits: Array[Long] => notXs = bits
+    }
+
+    // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true.
+    // hacky side-effect, used for performance of AliasingFrame.merge.
+    private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true
+
+    private def checkABCD(x: Int, num: Int): Boolean = {
+      // assert(x == a && num == 1 || x == b && num == 2 || ...)
+      x != -1 && {
+        val otherHasA = x == notA || x == notB  || x == notC || x == notD || (notXs != null && bsContains(notXs, x))
+        if (otherHasA) setThisAndOther(x)
+        else abcdNext = x
+        (num: @switch) match {
+          case 1 => a = -1
+          case 2 => b = -1
+          case 3 => c = -1
+          case 4 => d = -1
+        }
+        !otherHasA
+      }
+    }
+
+    // main performance hot spot
+    private def checkXs = {
+      (xs != null) && {
+        val end = xs.length * 64
+
+        while (i < end && {
+          val index = i >> 6
+          if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time.
+            i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body
+            true
+          } else {
+            val mask = 1l << i
+            // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks
+            val thisHasI = (xs(index) & mask) != 0l
+            !thisHasI || {
+              val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l)
+              if (otherHasI) setThisAndOther(i)
+              otherHasI
+            }
+          }
+        }) i += 1
+
+        iValid = i < end
+        iValid
+      }
+    }
+
+    // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time
+    // is spent here. within hasNext, almost the entire time is spent in `checkXs`.
+    //
+    def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs
+
+    def next(): Int = {
+      if (hasNext) {
+        if (abcdNext != -1) {
+          val r = abcdNext; abcdNext = -1; r
+        } else {
+          val r = i; i += 1; iValid = false; r
+        }
+      } else Iterator.empty.next()
+    }
+  }
+
+//  The number of bits in a bit array. Useful for debugging.
+//  def bsSize(bits: Array[Long]) = {
+//    var r = 0
+//    var i = 0
+//    while (i < bits.length) {
+//      r += java.lang.Long.bitCount(bits(i))
+//      i += 1
+//    }
+//    r
+//  }
+
+  /**
+   * An iterator returning the elements in a that are not also in b (a &~ b).
+   *
+   * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that
+   * is both in a and b (&).
+   */
+  def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther)
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
new file mode 100644
index 0000000000..90da570f01
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
@@ -0,0 +1,514 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import java.lang.invoke.LambdaMetafactory
+
+import scala.annotation.switch
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.tools.asm.Opcodes._
+import scala.tools.asm.tree._
+import scala.tools.asm.tree.analysis._
+import scala.tools.asm.{Handle, Type}
+import scala.tools.nsc.backend.jvm.BTypes._
+import scala.tools.nsc.backend.jvm.GenBCode._
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+
+/**
+ * This component hosts tools and utilities used in the backend that require access to a `BTypes`
+ * instance.
+ *
+ * One example is the AsmAnalyzer class, which runs `computeMaxLocalsMaxStack` on the methodNode to
+ * be analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run
+ * cache `maxLocalsMaxStackComputed` defined in there.
+ */
+class BackendUtils[BT <: BTypes](val btypes: BT) {
+  import btypes._
+  import btypes.coreBTypes._
+  import callGraph.ClosureInstantiation
+
+  /**
+   * A wrapper to make ASM's Analyzer a bit easier to use.
+   */
+  class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) {
+    computeMaxLocalsMaxStack(methodNode)
+    try {
+      analyzer.analyze(classInternalName, methodNode)
+    } catch {
+      case ae: AnalyzerException =>
+        throw new AnalyzerException(null, "While processing " + classInternalName + "." + methodNode.name, ae)
+    }
+    def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode)
+  }
+
+  /**
+   * See the doc comment on package object `analysis` for a discussion on performance.
+   */
+  object AsmAnalyzer {
+    // jvm limit is 65535 for both number of instructions and number of locals
+
+    private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals
+
+    // with the limits below, analysis should not take more than one second
+
+    private val nullnessSizeLimit    = 5000l * 600l  * 600l    // 5000 insns, 600 locals
+    private val basicValueSizeLimit  = 9000l * 1000l * 1000l
+    private val sourceValueSizeLimit = 8000l * 950l  * 950l
+
+    def sizeOKForAliasing(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
+    def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
+    def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit
+    def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit
+  }
+
+  class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl
+
+  class NonLubbingTypeFlowAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new NonLubbingTypeFlowInterpreter))
+
+  /**
+   * Add:
+   * private static Object $deserializeLambda$(SerializedLambda l) {
+   *   return indy[scala.runtime.LambdaDeserialize.bootstrap](l)
+   * }
+   *
+   * We use invokedynamic here to enable caching within the deserializer without needing to
+   * host a static field in the enclosing class. This allows us to add this method to interfaces
+   * that define lambdas in default methods.
+   */
+  def addLambdaDeserialize(classNode: ClassNode, implMethods: Iterable[Handle]): Unit = {
+    val cw = classNode
+
+    // Make sure to reference the ClassBTypes of all types that are used in the code generated
+    // here (e.g. java/util/Map) are initialized. Initializing a ClassBType adds it to the
+    // `classBTypeFromInternalName` map. When writing the classfile, the asm ClassWriter computes
+    // stack map frames and invokes the `getCommonSuperClass` method. This method expects all
+    // ClassBTypes mentioned in the source code to exist in the map.
+
+    val nilLookupDesc = MethodBType(Nil, jliMethodHandlesLookupRef).descriptor
+    val serlamObjDesc = MethodBType(jliSerializedLambdaRef :: Nil, ObjectRef).descriptor
+
+    {
+      val mv = cw.visitMethod(ACC_PRIVATE + ACC_STATIC + ACC_SYNTHETIC, "$deserializeLambda$", serlamObjDesc, null, null)
+      mv.visitCode()
+      mv.visitVarInsn(ALOAD, 0)
+      mv.visitInvokeDynamicInsn("lambdaDeserialize", serlamObjDesc, lambdaDeserializeBootstrapHandle, implMethods.toArray: _*)
+      mv.visitInsn(ARETURN)
+      mv.visitEnd()
+    }
+  }
+
+  /**
+   * Clone the instructions in `methodNode` into a new [[InsnList]], mapping labels according to
+   * the `labelMap`. Returns the new instruction list and a map from old to new instructions, and
+   * a list of lambda implementation methods references by invokedynamic[LambdaMetafactory] for a
+   * serializable SAM types.
+   */
+  def cloneInstructions(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], keepLineNumbers: Boolean): (InsnList, Map[AbstractInsnNode, AbstractInsnNode], List[Handle]) = {
+    val javaLabelMap = labelMap.asJava
+    val result = new InsnList
+    var map = Map.empty[AbstractInsnNode, AbstractInsnNode]
+    var inlinedTargetHandles = mutable.ListBuffer[Handle]()
+    for (ins <- methodNode.instructions.iterator.asScala) {
+      ins match {
+        case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => indy.bsmArgs match {
+          case Array(_, targetHandle: Handle, _, flags: Integer, xs@_*) if (flags.intValue & LambdaMetafactory.FLAG_SERIALIZABLE) != 0 =>
+            inlinedTargetHandles += targetHandle
+          case _ =>
+        }
+        case _ =>
+      }
+      if (keepLineNumbers || !ins.isInstanceOf[LineNumberNode]) {
+        val cloned = ins.clone(javaLabelMap)
+        result add cloned
+        map += ((ins, cloned))
+      }
+    }
+    (result, map, inlinedTargetHandles.toList)
+  }
+
+  def getBoxedUnit: FieldInsnNode = new FieldInsnNode(GETSTATIC, srBoxedUnitRef.internalName, "UNIT", srBoxedUnitRef.descriptor)
+
+  private val anonfunAdaptedName = """.*\$anonfun\$.*\$\d+\$adapted""".r
+  def hasAdaptedImplMethod(closureInit: ClosureInstantiation): Boolean = {
+    anonfunAdaptedName.pattern.matcher(closureInit.lambdaMetaFactoryCall.implMethod.getName).matches
+  }
+
+  private def primitiveAsmTypeToBType(primitiveType: Type): PrimitiveBType = (primitiveType.getSort: @switch) match {
+    case Type.BOOLEAN => BOOL
+    case Type.BYTE    => BYTE
+    case Type.CHAR    => CHAR
+    case Type.SHORT   => SHORT
+    case Type.INT     => INT
+    case Type.LONG    => LONG
+    case Type.FLOAT   => FLOAT
+    case Type.DOUBLE  => DOUBLE
+    case _            => null
+  }
+
+  def isScalaBox(insn: MethodInsnNode): Boolean = {
+    insn.owner == srBoxesRunTimeRef.internalName && {
+      val args = Type.getArgumentTypes(insn.desc)
+      args.length == 1 && (srBoxesRuntimeBoxToMethods.get(primitiveAsmTypeToBType(args(0))) match {
+        case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc
+        case _ => false
+      })
+    }
+  }
+
+  def getScalaBox(primitiveType: Type): MethodInsnNode = {
+    val bType = primitiveAsmTypeToBType(primitiveType)
+    val MethodNameAndType(name, methodBType) = srBoxesRuntimeBoxToMethods(bType)
+    new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false)
+  }
+
+  def isScalaUnbox(insn: MethodInsnNode): Boolean = {
+    insn.owner == srBoxesRunTimeRef.internalName && (srBoxesRuntimeUnboxToMethods.get(primitiveAsmTypeToBType(Type.getReturnType(insn.desc))) match {
+      case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc
+      case _ => false
+    })
+  }
+
+  def getScalaUnbox(primitiveType: Type): MethodInsnNode = {
+    val bType = primitiveAsmTypeToBType(primitiveType)
+    val MethodNameAndType(name, methodBType) = srBoxesRuntimeUnboxToMethods(bType)
+    new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false)
+  }
+
+  private def calleeInMap(insn: MethodInsnNode, map: Map[InternalName, MethodNameAndType]): Boolean = map.get(insn.owner) match {
+    case Some(MethodNameAndType(name, tp)) => insn.name == name && insn.desc == tp.descriptor
+    case _ => false
+  }
+
+  def isJavaBox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaBoxMethods)
+  def isJavaUnbox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaUnboxMethods)
+
+  def isPredefAutoBox(insn: MethodInsnNode): Boolean = {
+    insn.owner == PredefRef.internalName && (predefAutoBoxMethods.get(insn.name) match {
+      case Some(tp) => insn.desc == tp.descriptor
+      case _ => false
+    })
+  }
+
+  def isPredefAutoUnbox(insn: MethodInsnNode): Boolean = {
+    insn.owner == PredefRef.internalName && (predefAutoUnboxMethods.get(insn.name) match {
+      case Some(tp) => insn.desc == tp.descriptor
+      case _ => false
+    })
+  }
+
+  def isRefCreate(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefCreateMethods)
+  def isRefZero(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefZeroMethods)
+
+  def runtimeRefClassBoxedType(refClass: InternalName): Type = Type.getArgumentTypes(srRefCreateMethods(refClass).methodType.descriptor)(0)
+
+  def isSideEffectFreeCall(insn: MethodInsnNode): Boolean = {
+    isScalaBox(insn) || isScalaUnbox(insn) ||
+      isJavaBox(insn) || // not java unbox, it may NPE
+      isSideEffectFreeConstructorCall(insn)
+  }
+
+  def isNonNullMethodInvocation(mi: MethodInsnNode): Boolean = {
+    isJavaBox(mi) || isScalaBox(mi) || isPredefAutoBox(mi) || isRefCreate(mi) || isRefZero(mi)
+  }
+
+  def isModuleLoad(insn: AbstractInsnNode, moduleName: InternalName): Boolean = insn match {
+    case fi: FieldInsnNode => fi.getOpcode == GETSTATIC && fi.owner == moduleName && fi.name == "MODULE$" && fi.desc == ("L" + moduleName + ";")
+    case _ => false
+  }
+
+  def isPredefLoad(insn: AbstractInsnNode) = isModuleLoad(insn, PredefRef.internalName)
+
+  def isPrimitiveBoxConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, primitiveBoxConstructors)
+  def isRuntimeRefConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefConstructors)
+  def isTupleConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, tupleClassConstructors)
+
+  // unused objects created by these constructors are eliminated by pushPop
+  private lazy val sideEffectFreeConstructors: Set[(String, String)] = {
+    val ownerDesc = (p: (InternalName, MethodNameAndType)) => (p._1, p._2.methodType.descriptor)
+    primitiveBoxConstructors.map(ownerDesc).toSet ++
+      srRefConstructors.map(ownerDesc) ++
+      tupleClassConstructors.map(ownerDesc) ++ Set(
+        (ObjectRef.internalName, MethodBType(Nil, UNIT).descriptor),
+        (StringRef.internalName, MethodBType(Nil, UNIT).descriptor),
+        (StringRef.internalName, MethodBType(List(StringRef), UNIT).descriptor),
+        (StringRef.internalName, MethodBType(List(ArrayBType(CHAR)), UNIT).descriptor))
+  }
+
+  def isSideEffectFreeConstructorCall(insn: MethodInsnNode): Boolean = {
+    insn.name == INSTANCE_CONSTRUCTOR_NAME && sideEffectFreeConstructors((insn.owner, insn.desc))
+  }
+
+  private lazy val classesOfSideEffectFreeConstructors = sideEffectFreeConstructors.map(_._1)
+
+  def isNewForSideEffectFreeConstructor(insn: AbstractInsnNode) = {
+    insn.getOpcode == NEW && {
+      val ti = insn.asInstanceOf[TypeInsnNode]
+      classesOfSideEffectFreeConstructors.contains(ti.desc)
+    }
+  }
+
+  def isBoxedUnit(insn: AbstractInsnNode) = {
+    insn.getOpcode == GETSTATIC && {
+      val fi = insn.asInstanceOf[FieldInsnNode]
+      fi.owner == srBoxedUnitRef.internalName && fi.name == "UNIT" && fi.desc == srBoxedUnitRef.descriptor
+    }
+  }
+
+  /**
+   * Visit the class node and collect all referenced nested classes.
+   */
+  def collectNestedClasses(classNode: ClassNode): List[ClassBType] = {
+    val innerClasses = mutable.Set.empty[ClassBType]
+
+    def visitInternalName(internalName: InternalName): Unit = if (internalName != null) {
+      val t = classBTypeFromParsedClassfile(internalName)
+      if (t.isNestedClass.get) innerClasses += t
+    }
+
+    // either an internal/Name or [[Linternal/Name; -- there are certain references in classfiles
+    // that are either an internal name (without the surrounding `L;`) or an array descriptor
+    // `[Linternal/Name;`.
+    def visitInternalNameOrArrayReference(ref: String): Unit = if (ref != null) {
+      val bracket = ref.lastIndexOf('[')
+      if (bracket == -1) visitInternalName(ref)
+      else if (ref.charAt(bracket + 1) == 'L') visitInternalName(ref.substring(bracket + 2, ref.length - 1))
+    }
+
+    // we are only interested in the class references in the descriptor, so we can skip over
+    // primitives and the brackets of array descriptors
+    def visitDescriptor(desc: String): Unit = (desc.charAt(0): @switch) match {
+      case '(' =>
+        val internalNames = mutable.ListBuffer.empty[String]
+        var i = 1
+        while (i < desc.length) {
+          if (desc.charAt(i) == 'L') {
+            val start = i + 1 // skip the L
+            while (desc.charAt(i) != ';') i += 1
+            internalNames append desc.substring(start, i)
+          }
+          // skips over '[', ')', primitives
+          i += 1
+        }
+        internalNames foreach visitInternalName
+
+      case 'L' =>
+        visitInternalName(desc.substring(1, desc.length - 1))
+
+      case '[' =>
+        visitInternalNameOrArrayReference(desc)
+
+      case _ => // skip over primitive types
+    }
+
+    def visitConstant(const: AnyRef): Unit = const match {
+      case t: Type => visitDescriptor(t.getDescriptor)
+      case _ =>
+    }
+
+    // in principle we could references to annotation types, as they only end up as strings in the
+    // constant pool, not as class references. however, the java compiler still includes nested
+    // annotation classes in the innerClass table, so we do the same. explained in detail in the
+    // large comment in class BTypes.
+    def visitAnnotation(annot: AnnotationNode): Unit = {
+      visitDescriptor(annot.desc)
+      if (annot.values != null) annot.values.asScala foreach visitConstant
+    }
+
+    def visitAnnotations(annots: java.util.List[_ <: AnnotationNode]) = if (annots != null) annots.asScala foreach visitAnnotation
+    def visitAnnotationss(annotss: Array[java.util.List[AnnotationNode]]) = if (annotss != null) annotss foreach visitAnnotations
+
+    def visitHandle(handle: Handle): Unit = {
+      visitInternalNameOrArrayReference(handle.getOwner)
+      visitDescriptor(handle.getDesc)
+    }
+
+    visitInternalName(classNode.name)
+    innerClasses ++= classBTypeFromParsedClassfile(classNode.name).info.get.nestedClasses
+
+    visitInternalName(classNode.superName)
+    classNode.interfaces.asScala foreach visitInternalName
+    visitInternalName(classNode.outerClass)
+
+    visitAnnotations(classNode.visibleAnnotations)
+    visitAnnotations(classNode.visibleTypeAnnotations)
+    visitAnnotations(classNode.invisibleAnnotations)
+    visitAnnotations(classNode.invisibleTypeAnnotations)
+
+    for (f <- classNode.fields.asScala) {
+      visitDescriptor(f.desc)
+      visitAnnotations(f.visibleAnnotations)
+      visitAnnotations(f.visibleTypeAnnotations)
+      visitAnnotations(f.invisibleAnnotations)
+      visitAnnotations(f.invisibleTypeAnnotations)
+    }
+
+    for (m <- classNode.methods.asScala) {
+      visitDescriptor(m.desc)
+
+      visitAnnotations(m.visibleAnnotations)
+      visitAnnotations(m.visibleTypeAnnotations)
+      visitAnnotations(m.invisibleAnnotations)
+      visitAnnotations(m.invisibleTypeAnnotations)
+      visitAnnotationss(m.visibleParameterAnnotations)
+      visitAnnotationss(m.invisibleParameterAnnotations)
+      visitAnnotations(m.visibleLocalVariableAnnotations)
+      visitAnnotations(m.invisibleLocalVariableAnnotations)
+
+      m.exceptions.asScala foreach visitInternalName
+      for (tcb <- m.tryCatchBlocks.asScala) visitInternalName(tcb.`type`)
+
+      val iter = m.instructions.iterator()
+      while (iter.hasNext) iter.next() match {
+        case ti: TypeInsnNode           => visitInternalNameOrArrayReference(ti.desc)
+        case fi: FieldInsnNode          => visitInternalNameOrArrayReference(fi.owner); visitDescriptor(fi.desc)
+        case mi: MethodInsnNode         => visitInternalNameOrArrayReference(mi.owner); visitDescriptor(mi.desc)
+        case id: InvokeDynamicInsnNode  => visitDescriptor(id.desc); visitHandle(id.bsm); id.bsmArgs foreach visitConstant
+        case ci: LdcInsnNode            => visitConstant(ci.cst)
+        case ma: MultiANewArrayInsnNode => visitDescriptor(ma.desc)
+        case _ =>
+      }
+    }
+    innerClasses.toList
+  }
+
+  /**
+   * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM
+   * framework only computes these values during bytecode generation.
+   *
+   * NOTE 1: as explained in the `analysis` package object, the maxStack value used by the Analyzer
+   * may be smaller than the correct maxStack value in the classfile (Analyzers only use a single
+   * slot for long / double values). The maxStack computed here are correct for running an analyzer,
+   * but not for writing in the classfile. We let the ClassWriter recompute max's.
+   *
+   * NOTE 2: the maxStack value computed here may be larger than the smallest correct value
+   * that would allow running an analyzer, see `InstructionStackEffect.forAsmAnalysis` and
+   * `InstructionStackEffect.maxStackGrowth`.
+   *
+   * NOTE 3: the implementation doesn't look at instructions that cannot be reached, it computes
+   * the max local / stack size in the reachable code. These max's work just fine for running an
+   * Analyzer: its implementation also skips over unreachable code in the same way.
+   */
+  def computeMaxLocalsMaxStack(method: MethodNode): Unit = {
+    if (isAbstractMethod(method) || isNativeMethod(method)) {
+      method.maxLocals = 0
+      method.maxStack = 0
+    } else if (!maxLocalsMaxStackComputed(method)) {
+      val size = method.instructions.size
+
+      var maxLocals = parametersSize(method)
+      var maxStack = 0
+
+      // queue of instruction indices where analysis should start
+      var queue = new Array[Int](8)
+      var top = -1
+      def enq(i: Int): Unit = {
+        if (top == queue.length - 1) {
+          val nq = new Array[Int](queue.length * 2)
+          Array.copy(queue, 0, nq, 0, queue.length)
+          queue = nq
+        }
+        top += 1
+        queue(top) = i
+      }
+      def deq(): Int = {
+        val r = queue(top)
+        top -= 1
+        r
+      }
+
+      val subroutineRetTargets = new mutable.Stack[AbstractInsnNode]
+
+      // for each instruction in the queue, contains the stack height at this instruction.
+      // once an instruction has been treated, contains -1 to prevent re-enqueuing
+      val stackHeights = new Array[Int](size)
+
+      def enqInsn(insn: AbstractInsnNode, height: Int): Unit = {
+        enqInsnIndex(method.instructions.indexOf(insn), height)
+      }
+
+      def enqInsnIndex(insnIndex: Int, height: Int): Unit = {
+        if (insnIndex < size && stackHeights(insnIndex) != -1) {
+          stackHeights(insnIndex) = height
+          enq(insnIndex)
+        }
+      }
+
+      val tcbIt = method.tryCatchBlocks.iterator()
+      while (tcbIt.hasNext) {
+        val tcb = tcbIt.next()
+        enqInsn(tcb.handler, 1)
+        if (maxStack == 0) maxStack = 1
+      }
+
+      enq(0)
+      while (top != -1) {
+        val insnIndex = deq()
+        val insn = method.instructions.get(insnIndex)
+        val initHeight = stackHeights(insnIndex)
+        stackHeights(insnIndex) = -1 // prevent i from being enqueued again
+
+        if (insn.getOpcode == -1) { // frames, labels, line numbers
+          enqInsnIndex(insnIndex + 1, initHeight)
+        } else {
+          val stackGrowth = InstructionStackEffect.maxStackGrowth(insn)
+          val heightAfter = initHeight + stackGrowth
+          if (heightAfter > maxStack) maxStack = heightAfter
+
+          // update maxLocals
+          insn match {
+            case v: VarInsnNode =>
+              val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0
+              maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 because local numbers are 0-based
+
+            case i: IincInsnNode =>
+              maxLocals = math.max(maxLocals, i.`var` + 1)
+
+            case _ =>
+          }
+
+          insn match {
+            case j: JumpInsnNode =>
+              if (j.getOpcode == JSR) {
+                val jsrTargetHeight = heightAfter + 1
+                if (jsrTargetHeight > maxStack) maxStack = jsrTargetHeight
+                subroutineRetTargets.push(j.getNext)
+                enqInsn(j.label, jsrTargetHeight)
+              } else {
+                enqInsn(j.label, heightAfter)
+                val opc = j.getOpcode
+                if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target
+              }
+
+            case l: LookupSwitchInsnNode =>
+              var j = 0
+              while (j < l.labels.size) {
+                enqInsn(l.labels.get(j), heightAfter); j += 1
+              }
+              enqInsn(l.dflt, heightAfter)
+
+            case t: TableSwitchInsnNode =>
+              var j = 0
+              while (j < t.labels.size) {
+                enqInsn(t.labels.get(j), heightAfter); j += 1
+              }
+              enqInsn(t.dflt, heightAfter)
+
+            case r: VarInsnNode if r.getOpcode == RET =>
+              enqInsn(subroutineRetTargets.pop(), heightAfter)
+
+            case _ =>
+              val opc = insn.getOpcode
+              if (opc != ATHROW && !isReturn(insn))
+                enqInsnIndex(insnIndex + 1, heightAfter)
+          }
+        }
+      }
+
+      method.maxLocals = maxLocals
+      method.maxStack = maxStack
+
+      maxLocalsMaxStackComputed += method
+    }
+  }
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
index 8d8ea839e6..dd19ad594f 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
@@ -5,35 +5,74 @@ package analysis
 import scala.annotation.switch
 import scala.tools.asm.Opcodes._
 import scala.tools.asm.Type
-import scala.tools.asm.tree.{MultiANewArrayInsnNode, InvokeDynamicInsnNode, MethodInsnNode, AbstractInsnNode}
+import scala.tools.asm.tree._
 import scala.tools.asm.tree.analysis.{Frame, Value}
 import opt.BytecodeUtils._
-import collection.immutable
 
 object InstructionStackEffect {
-  private var cache: immutable.IntMap[(Int, Int)] = immutable.IntMap.empty
-  private def t(x: Int, y: Int): (Int, Int) = {
-    // x can go up to 255 (number of parameters of a method, dimensions in multianewarray) we cache
-    // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases).
-    if (x > 10 || y > 6) (x, y)
-    else {
-      val key = (x << 8) + y // this would work for any x < 256
-      if (cache contains key) {
-        cache(key)
-      } else {
-        val r = (x, y)
-        cache += key -> r
-        r
-      }
-    }
+  val consShift = 3
+  val prodMask = (1 << consShift) - 1
+
+  def cons(i: Int) = i >>> consShift
+  def prod(i: Int) = i & prodMask
+
+  private def t(x: Int, y: Int): Int = (x << consShift) | y
+
+  /**
+   * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int`
+   * (the `cons` / `prod` extract individual values). The returned values are correct for use in
+   * asm's Analyzer framework. For example, a LLOAD instruction produces one stack value. See also
+   * doc in `analysis` package object.
+   *
+   * This method requires the `frame` to be in the state **before** executing / interpreting the
+   * `insn`.
+   */
+  def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): Int = computeConsProd(insn, forClassfile = false, conservative = false, frame = frame)
+
+  /**
+   * Returns the maximal possible growth of the stack when executing `insn`. The returned value
+   * is usually the same as expected by asm's Analyzer framework, but it may be larger. For
+   * example, consider a POP2 instruction:
+   *   - if two size-1 values are popped, then the asm Analyzer consumes two values
+   *   - if a size-2 value is popped, the asm Analyzer consumes only one stack slot (see doc in the
+   *     `analysis` package object)
+   *
+   * If a precise result is needed, invoke the `forAsmAnalysis` and provide a `frame` value that
+   * allows looking up the sizes of values on the stack.
+   */
+  def maxStackGrowth(insn: AbstractInsnNode): Int = {
+    val prodCons = computeConsProd(insn, forClassfile = false, conservative = true)
+    prod(prodCons) - cons(prodCons)
   }
 
   /**
-   * Returns a pair with the number of stack values consumed and produced by `insn`.
-   * This method requires the `frame` to be in the state **before** executing / interpreting
-   * the `insn`.
+   * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int`
+   * (the `cons` / `prod` extract individual values).  The returned values are correct for writing
+   * into a classfile (see doc on the `analysis` package object).
    */
-  def apply[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = {
+  def forClassfile(insn: AbstractInsnNode): Int = computeConsProd(insn, forClassfile = true, conservative = false)
+
+  private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): Int = {
+    val consumesReceiver = insn.getOpcode != INVOKESTATIC && insn.getOpcode != INVOKEDYNAMIC
+    if (forClassfile) {
+      val sizes = Type.getArgumentsAndReturnSizes(methodDesc)
+      val cons = (sizes >> 2) - (if (consumesReceiver) 0 else 1)
+      val prod = sizes & 0x03
+      t(cons, prod)
+    } else {
+      val cons = Type.getArgumentTypes(methodDesc).length + (if (consumesReceiver) 1 else 0)
+      val prod = if (Type.getReturnType(methodDesc) == Type.VOID_TYPE) 0 else 1
+      t(cons, prod)
+    }
+  }
+
+  private def fieldInsnIsLongOrDouble(insn: AbstractInsnNode) = {
+    val d = insn.asInstanceOf[FieldInsnNode].desc
+    d == "J" || d == "D"
+  }
+
+  private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean, conservative: Boolean, frame: Frame[V] = null): Int = {
+    // not used if `forClassfile || conservative`: in these cases, `frame` is allowed to be `null`
     def peekStack(n: Int): V = frame.peekStack(n)
 
     (insn.getOpcode: @switch) match {
@@ -48,142 +87,176 @@ object InstructionStackEffect {
            ICONST_3 |
            ICONST_4 |
            ICONST_5 |
-           LCONST_0 |
-           LCONST_1 |
            FCONST_0 |
            FCONST_1 |
            FCONST_2 |
-           DCONST_0 |
-           DCONST_1 |
            BIPUSH |
            SIPUSH |
-           LDC |
            ILOAD |
-           LLOAD |
            FLOAD |
-           DLOAD |
            ALOAD => t(0, 1)
 
+      case LDC =>
+        if (forClassfile) insn.asInstanceOf[LdcInsnNode].cst match {
+          case _: java.lang.Long | _: java.lang.Double => t(0, 2)
+          case _ => t(0, 1)
+        } else
+          t(0, 1)
+
+      case LCONST_0 |
+           LCONST_1 |
+           DCONST_0 |
+           DCONST_1 |
+           LLOAD |
+           DLOAD => if (forClassfile) t(0, 2) else t(0, 1)
+
       case IALOAD |
-           LALOAD |
            FALOAD |
-           DALOAD |
            AALOAD |
            BALOAD |
            CALOAD |
            SALOAD => t(2, 1)
 
+      case LALOAD |
+           DALOAD => if (forClassfile) t(2, 2) else t(2, 1)
+
       case ISTORE |
-           LSTORE |
            FSTORE |
-           DSTORE |
            ASTORE => t(1, 0)
 
+      case LSTORE |
+           DSTORE => if (forClassfile) t(2, 0) else t(1, 0)
+
       case IASTORE |
-           LASTORE |
            FASTORE |
-           DASTORE |
            AASTORE |
            BASTORE |
            CASTORE |
            SASTORE => t(3, 0)
 
+      case LASTORE |
+           DASTORE => if (forClassfile) t(4, 0) else t(3, 0)
+
       case POP => t(1, 0)
 
       case POP2 =>
-        val isSize2 = peekStack(0).getSize == 2
-        if (isSize2) t(1, 0) else t(2, 0)
+        if (forClassfile) t(2, 0)
+        else if (conservative) t(1, 0)
+        else {
+          val isSize2 = peekStack(0).getSize == 2
+          if (isSize2) t(1, 0) else t(2, 0)
+        }
 
       case DUP => t(1, 2)
 
       case DUP_X1 => t(2, 3)
 
       case DUP_X2 =>
-        val isSize2 = peekStack(1).getSize == 2
-        if (isSize2) t(2, 3) else t(3, 4)
+        if (forClassfile || conservative) t(3, 4)
+        else {
+          val isSize2 = peekStack(1).getSize == 2
+          if (isSize2) t(2, 3) else t(3, 4)
+        }
 
       case DUP2 =>
-        val isSize2 = peekStack(0).getSize == 2
-        if (isSize2) t(1, 2) else t(2, 4)
+        if (forClassfile || conservative) t(2, 4)
+        else {
+          val isSize2 = peekStack(0).getSize == 2
+          if (isSize2) t(1, 2) else t(2, 4)
+        }
 
       case DUP2_X1 =>
-        val isSize2 = peekStack(0).getSize == 2
-        if (isSize2) t(2, 3) else t(3, 4)
+        if (forClassfile || conservative) t(3, 5)
+        else {
+          val isSize2 = peekStack(0).getSize == 2
+          if (isSize2) t(2, 3) else t(3, 5)
+        }
 
       case DUP2_X2 =>
-        val v1isSize2 = peekStack(0).getSize == 2
-        if (v1isSize2) {
-          val v2isSize2 = peekStack(1).getSize == 2
-          if (v2isSize2) t(2, 3) else t(3, 4)
-        } else {
-          val v3isSize2 = peekStack(2).getSize == 2
-          if (v3isSize2) t(3, 5) else t(4, 6)
+        if (forClassfile || conservative) t(4, 6)
+        else {
+          val v1isSize2 = peekStack(0).getSize == 2
+          if (v1isSize2) {
+            val v2isSize2 = peekStack(1).getSize == 2
+            if (v2isSize2) t(2, 3) else t(3, 4)
+          } else {
+            val v3isSize2 = peekStack(2).getSize == 2
+            if (v3isSize2) t(3, 5) else t(4, 6)
+          }
         }
 
       case SWAP => t(2, 2)
 
       case IADD |
-           LADD |
            FADD |
-           DADD |
            ISUB |
-           LSUB |
            FSUB |
-           DSUB |
            IMUL |
-           LMUL |
            FMUL |
-           DMUL |
            IDIV |
-           LDIV |
            FDIV |
-           DDIV |
            IREM |
+           FREM => t(2, 1)
+
+      case LADD |
+           DADD |
+           LSUB |
+           DSUB |
+           LMUL |
+           DMUL |
+           LDIV |
+           DDIV |
            LREM |
-           FREM |
-           DREM => t(2, 1)
+           DREM => if (forClassfile) t(4, 2) else t(2, 1)
 
       case INEG |
-           LNEG |
-           FNEG |
-           DNEG => t(1, 1)
+           FNEG => t(1, 1)
+
+      case LNEG |
+           DNEG => if (forClassfile) t(2, 2) else t(1, 1)
 
       case ISHL |
-           LSHL |
            ISHR |
-           LSHR |
            IUSHR |
-           LUSHR |
            IAND |
-           LAND |
            IOR |
+           IXOR => t(2, 1)
+
+      case LSHL |
+           LSHR |
+           LUSHR => if (forClassfile) t(3, 2) else t(2, 1)
+
+      case LAND |
            LOR |
-           IXOR |
-           LXOR => t(2, 1)
+           LXOR => if (forClassfile) t(4, 2) else t(2, 1)
 
       case IINC => t(0, 0)
 
-      case I2L |
-           I2F |
-           I2D |
-           L2I |
-           L2F |
-           L2D |
+      case I2F |
            F2I |
-           F2L |
-           F2D |
-           D2I |
-           D2L |
-           D2F |
            I2B |
            I2C |
            I2S => t(1, 1)
 
+      case I2L |
+           I2D |
+           F2L |
+           F2D => if (forClassfile) t(1, 2) else t(1, 1)
+
+      case L2I |
+           L2F |
+           D2I |
+           D2F => if (forClassfile) t(2, 1) else t(1, 1)
+
+      case L2D |
+           D2L => if (forClassfile) t(2, 2) else t(1, 1)
+
+      case FCMPL |
+           FCMPG => t(2, 1)
+
       case LCMP |
-           FCMPL |
-           FCMPG |
            DCMPL |
-           DCMPG => t(2, 1)
+           DCMPG => if (forClassfile) t(4, 1) else t(2, 1)
 
       case IFEQ |
            IFNE |
@@ -211,35 +284,36 @@ object InstructionStackEffect {
            LOOKUPSWITCH => t(1, 0)
 
       case IRETURN |
-           LRETURN |
            FRETURN |
-           DRETURN |
            ARETURN => t(1, 0) // Frame.execute consumes one stack value
 
+      case LRETURN |
+           DRETURN => if (forClassfile) t(2, 0) else t(1, 0)
+
       case RETURN => t(0, 0) // Frame.execute does not change the stack
 
-      case GETSTATIC => t(0, 1)
+      case GETSTATIC =>
+        val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+        t(0, prod)
 
-      case PUTSTATIC => t(1, 0)
+      case PUTSTATIC =>
+        val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+        t(cons, 0)
 
-      case GETFIELD => t(1, 1)
+      case GETFIELD =>
+        val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+        t(1, prod)
 
-      case PUTFIELD => t(2, 0)
+      case PUTFIELD =>
+        val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 3 else 2
+        t(cons, 0)
 
       case INVOKEVIRTUAL |
            INVOKESPECIAL |
            INVOKESTATIC |
-           INVOKEINTERFACE =>
-        val desc = insn.asInstanceOf[MethodInsnNode].desc
-        val cons = Type.getArgumentTypes(desc).length + (if (insn.getOpcode == INVOKESTATIC) 0 else 1)
-        val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1
-        t(cons, prod)
-
-      case INVOKEDYNAMIC =>
-        val desc = insn.asInstanceOf[InvokeDynamicInsnNode].desc
-        val cons = Type.getArgumentTypes(desc).length
-        val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1
-        t(cons, prod)
+           INVOKEINTERFACE => invokeConsProd(insn.asInstanceOf[MethodInsnNode].desc, insn, forClassfile)
+
+      case INVOKEDYNAMIC => invokeConsProd(insn.asInstanceOf[InvokeDynamicInsnNode].desc, insn, forClassfile)
 
       case NEW => t(0, 1)
 
@@ -261,5 +335,4 @@ object InstructionStackEffect {
            IFNONNULL => t(1, 0)
     }
   }
-
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
index 31b62f747e..01afd0d2ef 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
@@ -5,68 +5,14 @@ package analysis
 import java.util
 
 import scala.annotation.switch
-import scala.tools.asm.{Type, Opcodes}
-import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode}
-import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value}
+import scala.tools.asm.{Opcodes, Type}
+import scala.tools.asm.tree.{AbstractInsnNode, LdcInsnNode, MethodInsnNode, MethodNode}
+import scala.tools.asm.tree.analysis._
 import scala.tools.nsc.backend.jvm.opt.BytecodeUtils
 import BytecodeUtils._
 
 /**
- * Some notes on the ASM analyzer framework.
- *
- * Value
- *  - Abstract, needs to be implemented for each analysis.
- *  - Represents the desired information about local variables and stack values, for example:
- *    - Is this value known to be null / not null?
- *    - What are the instructions that could potentially have produced this value?
- *
- * Interpreter
- *  - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
- *    interpreter, e.g., SourceInterpreter or BasicInterpreter.
- *  - Multiple abstract methods that receive an instruction and the instruction's input values, and
- *    return a value representing the result of that instruction.
- *    - Note: due to control flow, the interpreter can be invoked multiple times for the same
- *      instruction, until reaching a fixed point.
- *  - Abstract `merge` function that computes the least upper bound of two values. Used by
- *    Frame.merge (see below).
- *
- * Frame
- *  - Can be used directly for many analyses, no subclass required.
- *  - Every frame has an array of values: one for each local variable and for each stack slot.
- *    - A `top` index stores the index of the current stack top
- *    - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
- *      value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
- *      stack value.
- *  - Defines the `execute(instruction)` method.
- *    - executing mutates the state of the frame according to the effect of the instruction
- *      - pop consumed values from the stack
- *      - pass them to the interpreter together with the instruction
- *      - if applicable, push the resulting value on the stack
- *  - Defines the `merge(otherFrame)` method
- *    - called by the analyzer when multiple control flow paths lead to an instruction
- *      - the frame at the branching instruction is merged into the current frame of the
- *        instruction (held by the analyzer)
- *      - mutates the values of the current frame, merges all values using interpreter.merge.
- *
- * Analyzer
- *   - Stores a frame for each instruction
- *   - `merge` function takes an instruction and a frame, merges the existing frame for that instr
- *     (from the frames array) with the new frame passed as argument.
- *     if the frame changed, puts the instruction on the work queue (fixpiont).
- *   - initial frame: initialized for first instr by calling interpreter.new[...]Value
- *     for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
- *   - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
- *   - analyze(method): simulate control flow. while work queue non-empty:
- *     - copy the state of `frames[instr]` into a local frame `current`
- *     - call `current.execute(instr, interpreter)`, mutating the `current` frame
- *     - if it's a branching instruction
- *       - for all potential destination instructions
- *         - merge the destination instruction frame with the `current` frame
- *           (this enqueues the destination instr if its frame changed)
- *       - invoke `newControlFlowEdge` (see below)
- *   - the analyzer also tracks active exception handlers at each instruction
- *   - the empty method `newControlFlowEdge` can be overridden to track control flow if required
- *
+ * See the package object `analysis` for details on the ASM analysis framework.
  *
  * Some notes on nullness analysis.
  *
@@ -87,59 +33,37 @@ import BytecodeUtils._
  */
 
 /**
- * Type to represent nullness of values.
- */
-sealed trait Nullness {
-  final def merge(other: Nullness) = if (this == other) this else Unknown
-}
-case object NotNull extends Nullness
-case object Unknown extends Nullness
-case object Null    extends Nullness
-
-/**
  * Represents the nullness state for a local variable or stack value.
  *
- * Note that nullness of primitive values is not tracked, it will be always [[Unknown]].
+ * Note that nullness of primitive values is not tracked, it will be always unknown.
  */
-sealed trait NullnessValue extends Value {
-  /**
-   * The nullness of this value.
-   */
-  def nullness: Nullness
-
-  /**
-   * True if this value is a long or double. The Analyzer framework needs to know
-   * the size of each value when interpreting instructions, see `Frame.execute`.
-   */
-  def isSize2: Boolean
+sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value {
   /**
    * The size of the slot described by this value. Cannot be 0 because no values are allocated
    * for void-typed slots, see NullnessInterpreter.newValue.
    **/
   def getSize: Int = if (isSize2) 2 else 1
 
-  def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2)
+  def merge(other: NullnessValue) = {
+    if (this eq other) this
+    else if (this eq UnknownValue2) this // the only possible value of size two
+    else UnknownValue1
+  }
+
+  final override def equals(other: Any) = this eq other.asInstanceOf[Object]
 }
 
-object NullValue     extends NullnessValue { def nullness = Null;    def isSize2 = false; override def toString = "Null"     }
-object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" }
-object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true;  override def toString = "Unknown2" }
-object NotNullValue  extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull"  }
+object NullValue     extends NullnessValue(isSize2 = false) { override def toString = "Null"     }
+object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" }
+object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" }
+object NotNullValue  extends NullnessValue(isSize2 = false) { override def toString = "NotNull"  }
 
 object NullnessValue {
-  def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = {
-    if      (nullness == Null)    NullValue
-    else if (nullness == NotNull) NotNullValue
-    else if (isSize2)             UnknownValue2
-    else                          UnknownValue1
-  }
-
-  def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = {
-    apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2)
-  }
+  def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1
+  def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1
 }
 
-final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) {
+final class NullnessInterpreter(bTypes: BTypes, method: MethodNode) extends Interpreter[NullnessValue](Opcodes.ASM5) {
   def newValue(tp: Type): NullnessValue = {
     // ASM loves giving semantics to null. The behavior here is the same as in SourceInterpreter,
     // which is provided by the framework.
@@ -151,29 +75,31 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
     // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes
     //     `newValue(null)` for each local variable. We have to return a value of size 1.
     if (tp == Type.VOID_TYPE) null // (1)
-    else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
+    else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
   }
 
   override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = {
     // For instance methods, the `this` parameter is known to be not null.
-    if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false)
+    val isThis = local == 0 && (isInstanceMethod || {
+      method.parameters != null && !method.parameters.isEmpty && {
+        val p = method.parameters.get(0)
+        (p.access & Opcodes.ACC_SYNTHETIC) != 0 && p.name == "$this"
+      }
+    })
+    if (isThis) NotNullValue
     else super.newParameterValue(isInstanceMethod, local, tp)
   }
 
-  def newOperation(insn: AbstractInsnNode): NullnessValue = {
-    val nullness = (insn.getOpcode: @switch) match {
-      case Opcodes.ACONST_NULL => Null
+  def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match {
+    case Opcodes.ACONST_NULL => NullValue
 
-      case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
-        case _: String | _: Type => NotNull
-        case _ => Unknown
-      }
-
-      case _ => Unknown
+    case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
+      case _: String | _: Type => NotNullValue
+      case _ => NullnessValue.unknown(insn)
     }
 
     // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call.
-    NullnessValue(nullness, insn)
+    case _ => NullnessValue.unknown(insn)
   }
 
   def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value
@@ -182,26 +108,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
     case Opcodes.CHECKCAST => value
 
     case Opcodes.NEWARRAY |
-         Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false)
+         Opcodes.ANEWARRAY => NotNullValue
 
-    case _ => NullnessValue(Unknown, insn)
+    case _ => NullnessValue.unknown(insn)
   }
 
   def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = {
-    NullnessValue(Unknown, insn)
+    NullnessValue.unknown(insn)
   }
 
-  def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = {
-    NullnessValue(Unknown, isSize2 = false)
-  }
+  def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1
 
-  def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match {
-    case Opcodes.MULTIANEWARRAY =>
-      NullnessValue(NotNull, isSize2 = false)
+  def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = insn match {
+    case mi: MethodInsnNode if bTypes.backendUtils.isNonNullMethodInvocation(mi) =>
+      NotNullValue
 
     case _ =>
-      // TODO: use a list of methods that are known to return non-null values
-      NullnessValue(Unknown, insn)
+      if (insn.getOpcode == Opcodes.MULTIANEWARRAY) NotNullValue
+      else NullnessValue.unknown(insn)
   }
 
   def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = ()
@@ -219,8 +143,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
   override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = {
     import Opcodes._
 
-    // get the object id of the object that is known to be not-null after this operation
-    val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match {
+    // get the alias set the object that is known to be not-null after this operation.
+    // alias sets are mutable / mutated, so after super.execute, this set contains the remaining
+    // aliases of the value that becomes not-null.
+    val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match {
       case IALOAD |
            LALOAD |
            FALOAD |
@@ -229,7 +155,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
            BALOAD |
            CALOAD |
            SALOAD =>
-        aliasId(this.stackTop - 1)
+        aliasesOf(this.stackTop - 1)
 
       case IASTORE |
            FASTORE |
@@ -239,35 +165,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
            SASTORE |
            LASTORE |
            DASTORE =>
-        aliasId(this.stackTop - 2)
+        aliasesOf(this.stackTop - 2)
 
       case GETFIELD =>
-        aliasId(this.stackTop)
+        aliasesOf(this.stackTop)
 
       case PUTFIELD =>
-        aliasId(this.stackTop - 1)
+        aliasesOf(this.stackTop - 1)
 
       case INVOKEVIRTUAL |
            INVOKESPECIAL |
            INVOKEINTERFACE =>
         val desc = insn.asInstanceOf[MethodInsnNode].desc
         val numArgs = Type.getArgumentTypes(desc).length
-        aliasId(this.stackTop - numArgs)
+        aliasesOf(this.stackTop - numArgs)
 
       case ARRAYLENGTH |
            MONITORENTER |
            MONITOREXIT =>
-        aliasId(this.stackTop)
+        aliasesOf(this.stackTop)
 
       case _ =>
-        -1
+        null
     }
 
     super.execute(insn, interpreter)
 
-    if (nullCheckedAliasId != -1) {
-      for (i <- valuesWithAliasId(nullCheckedAliasId))
-        this.setValue(i, NotNullValue)
+    if (nullCheckedAliases != null) {
+      val it = nullCheckedAliases.iterator
+      while (it.hasNext)
+        this.setValue(it.next(), NotNullValue)
     }
   }
 }
@@ -276,7 +203,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
  * This class is required to override the `newFrame` methods, which makes makes sure the analyzer
  * uses NullnessFrames.
  */
-class NullnessAnalyzer extends Analyzer[NullnessValue](new NullnessInterpreter) {
+class NullnessAnalyzer(bTypes: BTypes, method: MethodNode) extends Analyzer[NullnessValue](new NullnessInterpreter(bTypes, method)) {
   override def newFrame(nLocals: Int, nStack: Int): NullnessFrame = new NullnessFrame(nLocals, nStack)
   override def newFrame(src: Frame[_ <: NullnessValue]): NullnessFrame = new NullnessFrame(src)
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
index 594fd8923c..8af4bd4d5d 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
@@ -15,11 +15,10 @@ import scala.tools.asm.{Type, MethodVisitor}
 import scala.tools.asm.Opcodes._
 import scala.tools.asm.tree._
 import scala.tools.asm.tree.analysis._
-import scala.tools.nsc.backend.jvm.BTypes.InternalName
 
 import opt.BytecodeUtils._
 
-import scala.collection.convert.decorateAsScala._
+import scala.collection.JavaConverters._
 
 /**
  * This class provides additional queries over ASM's built-in `SourceValue` analysis.
@@ -55,24 +54,16 @@ import scala.collection.convert.decorateAsScala._
  *
  * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as
  * copying operations.
+ *
+ * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM)
+ * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge
+ * function (merging producer sets) is more complex than merging simple basic values.
+ * See also the doc comment in the package object `analysis`.
  */
-class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) {
-
-  /* Timers for benchmarking ProdCons
-  import scala.reflect.internal.util.Statistics._
-  import ProdConsAnalyzer._
-  val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer)
-  val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer)
-  */
-
-  val analyzer = new Analyzer(new InitialProducerSourceInterpreter)
+trait ProdConsAnalyzerImpl {
+  val methodNode: MethodNode
 
-//  val start = analyzerTimer.start()
-  analyzer.analyze(classInternalName, methodNode)
-//  analyzerTimer.stop(start)
-//  println(analyzerTimer.line)
-
-  def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode)
+  def frameAt(insn: AbstractInsnNode): Frame[SourceValue]
 
   /**
    * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`.
@@ -102,8 +93,13 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
     inputValues(insn).iterator.flatMap(v => v.insns.asScala).toSet
   }
 
-  def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] =
-    _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty)
+  def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match {
+    case _: UninitializedLocalProducer                 => Set.empty
+    case ParameterProducer(local)                      => consumersOfValueAt(methodNode.instructions.getFirst, local)
+    case ExceptionProducer(handlerLabel, handlerFrame) => consumersOfValueAt(handlerLabel, handlerFrame.stackTop)
+    case _ =>
+      _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty)
+  }
 
   /**
    * Returns the potential initial producer instructions of a value in the frame of `insn`.
@@ -159,13 +155,19 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
     inputValueSlots(insn).flatMap(slot => initialProducersForValueAt(insn, slot)).toSet
   }
 
-  def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = {
-    lazy val next = insn.getNext
-    outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet
+  def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match {
+    case _: UninitializedLocalProducer => Set.empty
+    case _ =>
+      lazy val next = insn match {
+        case _: ParameterProducer               => methodNode.instructions.getFirst
+        case ExceptionProducer(handlerLabel, _) => handlerLabel
+        case _                                  => insn.getNext
+      }
+      outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet
   }
 
   private def isCopyOperation(insn: AbstractInsnNode): Boolean = {
-    isVarInstruction(insn) || {
+    isLoadOrStore(insn) || {
       (insn.getOpcode: @switch) match {
         case DUP | DUP_X1 | DUP_X2 | DUP2 | DUP2_X1 | DUP2_X2 | SWAP | CHECKCAST => true
         case _ => false
@@ -376,9 +378,9 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
       Seq(insn.asInstanceOf[IincInsnNode].`var`)
     } else {
       val frame = frameAt(insn)
-      val stackEffect = InstructionStackEffect(insn, frame)
+      val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame)
       val stackSize = frame.getLocals + frame.getStackSize
-      (stackSize - stackEffect._1) until stackSize
+      (stackSize - InstructionStackEffect.cons(prodCons)) until stackSize
     }
   }
 
@@ -386,7 +388,7 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
   private def outputValueSlots(insn: AbstractInsnNode): Seq[Int] = insn match {
     case ParameterProducer(local)          => Seq(local)
     case UninitializedLocalProducer(local) => Seq(local)
-    case ExceptionProducer(frame)          => Seq(frame.stackTop)
+    case ExceptionProducer(_, frame)       => Seq(frame.stackTop)
     case _ =>
       if (insn.getOpcode == -1) return Seq.empty
       if (isStore(insn)) {
@@ -395,16 +397,15 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
         Seq(insn.asInstanceOf[IincInsnNode].`var`)
       } else {
         val frame = frameAt(insn)
-        val stackEffect = InstructionStackEffect(insn, frame)
+        val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame)
         val nextFrame = frameAt(insn.getNext)
         val stackSize = nextFrame.getLocals + nextFrame.getStackSize
-        (stackSize - stackEffect._2) until stackSize
+        (stackSize - InstructionStackEffect.prod(prodCons)) until stackSize
       }
   }
 
   /** For each instruction, a set of potential consumers of the produced values. */
   private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = {
-//    val start = consumersTimer.start()
     var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]]
     for {
       insn <- methodNode.instructions.iterator.asScala
@@ -417,8 +418,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
       val outputIndex = producedSlots.indexOf(i)
       res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn))
     }
-//    consumersTimer.stop(start)
-//    println(consumersTimer.line)
     res
   }
 
@@ -426,11 +425,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
   private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty
 }
 
-object ProdConsAnalyzer {
-  import scala.reflect.internal.util.Statistics._
-  val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm")
-}
-
 /**
  * A class for pseudo-instructions representing the initial producers of local values that have
  * no producer instruction in the method:
@@ -446,10 +440,10 @@ object ProdConsAnalyzer {
  *     return a;
  *   }
  *
- * In the first frame of the method, the SoruceValue for parameter `a` gives an empty set of
+ * In the first frame of the method, the SourceValue for parameter `a` gives an empty set of
  * producer instructions.
  *
- * In the frame of the `IRETURN` instruction, the SoruceValue for parameter `a` lists a single
+ * In the frame of the `IRETURN` instruction, the SourceValue for parameter `a` lists a single
  * producer instruction: the `ISTORE 1`. This makes it look as if there was a single producer for
  * `a`, where in fact it might still hold the parameter's initial value.
  */
@@ -459,9 +453,9 @@ abstract class InitialProducer extends AbstractInsnNode(-1) {
   override def accept(cv: MethodVisitor): Unit = throw new UnsupportedOperationException
 }
 
-case class ParameterProducer(local: Int)                      extends InitialProducer
-case class UninitializedLocalProducer(local: Int)             extends InitialProducer
-case class ExceptionProducer(handlerFrame: Frame[_ <: Value]) extends InitialProducer
+case class ParameterProducer(local: Int)                                                  extends InitialProducer
+case class UninitializedLocalProducer(local: Int)                                         extends InitialProducer
+case class ExceptionProducer[V <: Value](handlerLabel: LabelNode, handlerFrame: Frame[V]) extends InitialProducer
 
 class InitialProducerSourceInterpreter extends SourceInterpreter {
   override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): SourceValue = {
@@ -473,6 +467,6 @@ class InitialProducerSourceInterpreter extends SourceInterpreter {
   }
 
   override def newExceptionValue(tryCatchBlockNode: TryCatchBlockNode, handlerFrame: Frame[_ <: Value], exceptionType: Type): SourceValue = {
-    new SourceValue(1, ExceptionProducer(handlerFrame))
+    new SourceValue(1, ExceptionProducer(tryCatchBlockNode.handler, handlerFrame))
   }
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala
new file mode 100644
index 0000000000..bcf9978c16
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala
@@ -0,0 +1,36 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import scala.tools.asm.Type
+import scala.tools.asm.tree.analysis.{BasicValue, BasicInterpreter}
+
+abstract class TypeFlowInterpreter extends BasicInterpreter {
+  override def newValue(tp: Type) = {
+    if (tp == null) super.newValue(tp)
+    else if (isRef(tp)) new BasicValue(tp)
+    else super.newValue(tp)
+  }
+
+  def isRef(tp: Type) = tp != null && (tp.getSort match {
+    case Type.OBJECT | Type.ARRAY => true
+    case _ => false
+  })
+
+  def refLub(a: BasicValue, b: BasicValue): BasicValue
+
+  override def merge(a: BasicValue, b: BasicValue): BasicValue = {
+    if (a == b) a
+    else if (isRef(a.getType) && isRef(b.getType)) refLub(a, b)
+    else BasicValue.UNINITIALIZED_VALUE
+  }
+}
+
+/**
+ * A [[TypeFlowInterpreter]] which collapses LUBs of non-equal reference types to Object.
+ * This could be made more precise by looking up ClassBTypes for the two reference types and using
+ * the `jvmWiseLUB` method.
+ */
+class NonLubbingTypeFlowInterpreter extends TypeFlowInterpreter {
+  def refLub(a: BasicValue, b: BasicValue): BasicValue = BasicValue.REFERENCE_VALUE // java/lang/Object
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
new file mode 100644
index 0000000000..999c686aac
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
@@ -0,0 +1,374 @@
+package scala.tools.nsc.backend.jvm
+
+/**
+ * Summary on the ASM analyzer framework
+ * --------------------------------------
+ *
+ * Value
+ *  - Abstract, needs to be implemented for each analysis.
+ *  - Represents the desired information about local variables and stack values, for example:
+ *    - Is this value known to be null / not null?
+ *    - What are the instructions that could potentially have produced this value?
+ *
+ * Interpreter
+ *  - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
+ *    interpreter, e.g., SourceInterpreter or BasicInterpreter.
+ *  - Multiple abstract methods that receive an instruction and the instruction's input values, and
+ *    return a value representing the result of that instruction.
+ *    - Note: due to control flow, the interpreter can be invoked multiple times for the same
+ *      instruction, until reaching a fixed point.
+ *  - Abstract `merge` function that computes the least upper bound of two values. Used by
+ *    Frame.merge (see below).
+ *
+ * Frame
+ *  - Can be used directly for many analyses, no subclass required.
+ *  - Every frame has an array of values: one for each local variable and for each stack slot.
+ *    - A `top` index stores the index of the current stack top
+ *    - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
+ *      value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
+ *      stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode.
+ *      See comment below.
+ *  - Defines the `execute(instruction)` method.
+ *    - executing mutates the state of the frame according to the effect of the instruction
+ *      - pop consumed values from the stack
+ *      - pass them to the interpreter together with the instruction
+ *      - if applicable, push the resulting value on the stack
+ *  - Defines the `merge(otherFrame)` method
+ *    - called by the analyzer when multiple control flow paths lead to an instruction
+ *      - the frame at the branching instruction is merged into the current frame of the
+ *        instruction (held by the analyzer)
+ *      - mutates the values of the current frame, merges all values using interpreter.merge.
+ *
+ * Analyzer
+ *   - Stores a frame for each instruction
+ *   - `merge` function takes an instruction and a frame, merges the existing frame for that instr
+ *     (from the frames array) with the new frame passed as argument.
+ *     if the frame changed, puts the instruction on the work queue (fixpoint).
+ *   - initial frame: initialized for first instr by calling interpreter.new[...]Value
+ *     for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
+ *   - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
+ *   - analyze(method): simulate control flow. while work queue non-empty:
+ *     - copy the state of `frames[instr]` into a local frame `current`
+ *     - call `current.execute(instr, interpreter)`, mutating the `current` frame
+ *     - if it's a branching instruction
+ *       - for all potential destination instructions
+ *         - merge the destination instruction frame with the `current` frame
+ *           (this enqueues the destination instr if its frame changed)
+ *       - invoke `newControlFlowEdge` (see below)
+ *   - the analyzer also tracks active exception handlers at each instruction
+ *   - the empty method `newControlFlowEdge` can be overridden to track control flow if required
+ *
+ *
+ * MaxLocals and MaxStack
+ * ----------------------
+ *
+ * At the JVM level, long and double values occupy two slots, both as local variables and on the
+ * stack, as specified in the JVM spec 2.6.2:
+ *   "At any point in time, an operand stack has an associated depth, where a value of type long or
+ *    double contributes two units to the depth and a value of any other type contributes one unit."
+ *
+ * For example, a method
+ *   class A { def f(a: Long, b: Long) = a + b }
+ * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when
+ * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter).
+ *
+ * For running an ASM Analyzer, long and double values occupy two local variable slots, but only
+ * a single slot on the call stack, as shown by the following snippet:
+ *
+ *   import scala.tools.nsc.backend.jvm._
+ *   import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+ *   import scala.collection.convert.decorateAsScala._
+ *   import scala.tools.asm.tree.analysis._
+ *
+ *   val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class")
+ *   val m = cn.methods.iterator.asScala.find(_.name == "f").head
+ *
+ *   // the value is read from the classfile, so it's 4
+ *   println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4
+ *
+ *   // we can safely set it to 2 for running the analyzer.
+ *   m.maxStack = 2
+ *
+ *   val a = new Analyzer(new BasicInterpreter)
+ *   a.analyze(cn.name, m)
+ *   val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode
+ *   val addFrame = a.frameAt(addInsn, m)
+ *
+ *   addFrame.getStackSize // 2: the two long values only take one slot each
+ *   addFrame.getLocals    // 5: this takes one slot, the two long parameters take 2 slots each
+ *
+ *
+ * While running the optimizer, we need to make sure that the `maxStack` value of a method is
+ * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in
+ * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter.
+ *
+ *
+ * Lessons learnt while benchmarking the alias tracking analysis
+ * -------------------------------------------------------------
+ *
+ * Profiling
+ *  - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details
+ *    of a hotspot, don't pay too much attention to the percentages / time counts.
+ *  - Should also try other profilers.
+ *  - Use timers. When a method showed up as a hotspot, I added a timer around that method, and a
+ *    second one within the method to measure specific parts. The timers slow things down, but the
+ *    relative numbers show what parts of a method are slow.
+ *
+ * ASM analyzer insights
+ *  - The time for running an analysis depends on the number of locals and the number of instructions.
+ *    Reducing the number of locals helps speeding up the analysis: there are less values to
+ *    merge when merging to frames.
+ *    See also https://github.com/scala/scala-dev/issues/47
+ *  - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers.
+ *  - For nullness analysis the time is spent as follows
+ *    - 20% merging nullness values. this is as expected: for example, the same absolute amount of
+ *      time is spent in merging BasicValues when running a BasicInterpreter.
+ *    - 50% merging alias sets. i tried to optimize what i could out of this.
+ *    - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init.
+ *  - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to
+ *    `interpreter.merge`. This can be observed easily by running a test program that either runs
+ *    a BasicValue analysis only, versus a program that first runs a nullness analysis and then
+ *    a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms,
+ *    a 3.8x slowdown.
+ *  - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the
+ *    examples I benchmarked, the number of merge invocations was always exactly the same.
+ *    It would probably be possible to come up with an example where alias set merging forces
+ *    additional analysis rounds until reaching the fixpoint, but I did not observe such cases.
+ *
+ * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler
+ * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze
+ * can simply be parsed from a classfile. See example at the end of this comment.
+ *
+ *
+ * Nullness Analysis in Miguel's Optimizer
+ * ---------------------------------------
+ *
+ * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every
+ * frame has an array of values. Miguel's idea was to represent aliasing using reference equality
+ * in the values array: if two entries in the array point to the same value object, the two entries
+ * are aliases in the frame of the given instruction.
+ *
+ * While this idea seems elegant at first sight, Miguel's implementation does not merge frames
+ * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in
+ * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that
+ * c is removed as an alias of (a, b).
+ *
+ * It would be possible to implement correct alias set merging in Miguel's approach. However, frame
+ * merging is the main hot spot of analysis. The computational complexity of implementing alias set
+ * merging by traversing the values array and comparing references is too high. The concrete
+ * alias set representation that is used in the current implementation (see class AliasingFrame)
+ * makes alias set merging more efficient.
+ *
+ * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java
+ *
+ *
+ * Complexity and scaling of analysis
+ * ----------------------------------
+ *
+ * The time complexity of a data flow analysis depends on:
+ *
+ *   - The size of the method. The complexity factor is linear (assuming the number of locals and
+ *     branching instructions remains constant). The main analysis loop runs through all
+ *     instructions of a method once. Instructions are only re-enqueued if a control flow merge
+ *     changes the frame at some instruction.
+ *
+ *   - The branching instructions. When a second (third, ..) control flow edge arrives at an
+ *     instruction, the existing frame at the instruction is merged with the one computed on the
+ *     new branch. If the merge function changes the existing frame, the instruction is enqueued
+ *     for another analysis. This results in a merge operation for the successors of the
+ *     instruction.
+ *
+ *   - The number of local variables. The hot spot of analysis is frame merging. The merge function
+ *     iterates through the values in the frame (locals and stack values) and merges them.
+ *
+ * I measured the running time of an analysis for two examples:
+ *   - Keep the number of locals and branching instructions constant, increase the number of
+ *     instructions. The running time grows linearly with the method size.
+ *   - Increase the size and number of locals in a method. The method size and number of locals
+ *     grow in the same pace. Here, the running time increase is polynomial. It looks like the
+ *     complexity is be #instructions * #locals^2 (see below).
+ *
+ * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs
+ * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change.
+ *
+ * The numbers below are for nullness. Note that the last column is constant, i.e., the running
+ * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal
+ * method size for running an analysis.
+ *
+ *   #insns    #locals    time (ms)       time / #ins * #loc^2 * 10^6
+ *   1305      156        34              1.07
+ *   2610      311        165             0.65
+ *   3915      466        490             0.57
+ *   5220      621        1200            0.59
+ *   6525      776        2220            0.56
+ *   7830      931        3830            0.56
+ *   9135      1086       6570            0.60
+ *   10440     1241       9700            0.60
+ *   11745     1396       13800           0.60
+ *
+ * As a second experiment, nullness analysis was run with varying #insns but constant #locals.
+ * The last column shows linear complexity with respect to the method size (linearOffset = 2279):
+ *
+ *   #insns     #locals     time (ms)    (time + linearOffset) / #insns
+ *   5220       621         1090         0.645
+ *   6224       621         1690         0.637
+ *   7226       621         2280         0.630
+ *   8228       621         2870         0.625
+ *   9230       621         3530         0.629
+ *   10232      621         4130         0.626
+ *   11234      621         4770         0.627
+ *   12236      621         5520         0.637
+ *   13238      621         6170         0.638
+ *
+ *
+ * When running a BasicValue analysis, the complexity observation is the same (time is proportional
+ * to #ins * #loc^2).
+ *
+ *
+ * Measuring analysis execution time
+ * ---------------------------------
+ *
+ * See code below.
+ */
+
+/*
+object Test {
+  val overwrite: Option[String] = null
+
+  @noinline def serialize(o: AnyRef): String = null
+
+  @noinline def deserialize(string: String): AnyRef = null
+
+  @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) {
+    val result = serialize(instance)
+    val reconstituted = deserialize(result).asInstanceOf[T]
+    assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted)))
+  }
+
+  @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) {
+    // pattern match to introduce a lot of control flow, i.e., a lot of frame merges
+    overwrite match {
+      case Some(f) =>
+      case None =>
+        checkRoundTrip(instance)(f)
+        assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance)
+        assert(prevResult == "res", instance)
+    }
+  }
+
+  // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x
+
+  def testMain(): Unit = {
+    // every call to check creates quite a number of locals, and also quite a number of aliases
+    // of the same value (x1). First of all, the default argument call is expanded as below. Then
+    // method check is inlined, and within the body of check, checkRoundTrip and assert have
+    // already been inlined as well.
+
+    // {
+    //   val x1 = () => ""
+    //   val x2 = fun(x1())  // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1
+    //   check(x1())("", x2) // same here for x1
+    // }
+
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 5
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 10
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 15
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 20
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 25
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 30
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 35
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 40
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 45
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 50
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 55
+
+    // 1000 bytecode instructions, 0 locals
+    // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10));
+  }
+
+  def timed[T](f: => T): T = {
+    val start = System.nanoTime()
+    val r = f
+    val nanos = System.nanoTime() - start
+    println(s"took ${nanos/1000000}ms")
+    r
+  }
+
+  def main(args: Array[String]): Unit = {
+    import scala.tools.nsc.backend.jvm._
+    val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class")
+    import scala.collection.convert.decorateAsScala._
+    val m = cn.methods.iterator.asScala.find(_.name == "testMain").head
+
+    println(s"${m.instructions.size} instructions - ${m.maxLocals} locals")
+
+    val a = new analysis.NullnessAnalyzer
+    a.analyze(cn.name, m) // warm up
+
+    analysis.AliasingFrame.reset()
+    timed(a.analyze(cn.name, m))
+    analysis.AliasingFrame.timers foreach println
+
+    println("---")
+
+    // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue
+    // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside
+    // Frame.merge is no longer megamorphic.
+
+    import scala.tools.asm.tree.analysis._
+    val ba = new Analyzer(new BasicInterpreter)
+    ba.analyze(cn.name, m) // warm up
+
+    timed(ba.analyze(cn.name, m))
+
+    println("---")
+
+    timed(a.analyze(cn.name, m))
+  }
+}
+*/
+package object analysis