11 files changed, 3964 insertions, 1372 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala
new file mode 100644
index 0000000000..78fc7e1ecf
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala
@@ -0,0 +1,907 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2014 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package backend.jvm
+package opt
+
+import scala.annotation.tailrec
+import scala.tools.asm.Type
+import scala.tools.asm.Opcodes._
+import scala.tools.asm.tree._
+import scala.collection.mutable
+import scala.collection.JavaConverters._
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+
+class BoxUnbox[BT <: BTypes](val btypes: BT) {
+  import btypes._
+  import backendUtils._
+
+  /**
+   * Eliminate box-unbox pairs within `method`. Such appear commonly after closure elimination:
+   *
+   *   def t2 = {
+   *     val f = (b: Byte, i: Int) => i + b // no specialized variant for this function type
+   *     f(1, 2) // invokes the generic `apply`
+   *   }
+   *
+   * The closure optimizer re-writes the `apply` call to `anonfun$adapted` method, which takes
+   * boxed arguments. After inlining this method, we get
+   *
+   *   def t2 = {
+   *     val a = boxByte(1)
+   *     val b = boxInteger(2)
+   *     val r = boxInteger(anonfun$(unboxByte(a), unboxInt(b)))
+   *     unboxInt(r)
+   *   }
+   *
+   * All these box/unbox operations are eliminated here.
+   *
+   * Implementation: for every box operation, find all consumers of the boxed value, then all
+   * producers of these consumers, repeat until reaching a fixpoint. If this results in a set of
+   * boxing and unboxing operations, the box can be eliminated.
+   *
+   * There are two methods for eliminating boxes:
+   *   M1: If there is a single boxing operation, the boxed value(s) are stored into new local
+   *       variable(s) at the allocation site. Accesses to the boxed value are re-written to reads /
+   *       writes of these locals. Advantages:
+   *         - supports mutable boxes (IntRef and friends)
+   *         - supports eliminating unbox operations even if the box object needs to be created
+   *           because it escapes (see E4)
+   *           - works by keeping the unboxed value(s) in locals AND the box in its original form
+   *           - only for immutable boxes: modifications to the escaped box cannot be applied to
+   *             the local variable(s) holding the boxed value(s).
+   *       Restriction:
+   *         - does not work if there are multiple boxing operations (see E1)
+   *
+   *   M2: If there are multiple boxing operations, the boxing operations are simply eliminated,
+   *       leaving the unboxed value(s) on the stack. Store / load operations that previously
+   *       acted on the box are adapted to handle the boxed type(s). If the box contains multiple
+   *       values (or a size-2 value, which doesn't fit into locals that were used for the box),
+   *       new local slots are used for store / load operations. Restrictions:
+   *         - does not support re-writing writes to (mutable) boxes (see E2)
+   *         - does not support re-writing reads of boxes that also escape (see E3)
+   *
+   *
+   * E1: M1 only works if there's a single boxing operation.
+   *   def e1(b: Boolean) = {
+   *     val i: Integer = box(10) // 10 is stored into a new local, box operation and i removed
+   *     val j: Integer = box(20) // 20 is stored into a new local, box operation and j removed
+   *     val r = if (b) i else j  // loads and stores of the box are eliminated, r no longer exists
+   *     unbox(r)                 // cannot rewrite: we don't know which local to load
+   *   }
+   * Note: the example has no write and the box does not escape, so M2 works here.
+   *
+   * E2: mutable boxes with multiple boxing operations cannot be eliminated.
+   *   M1: see E1
+   *   M2: cannot replace an `IntRef` on the stack by an `Int` value on the stack, an Int on the
+   *       stack cannot be modified.
+   *
+   *   def e2(b: Boolean) = {
+   *     val r1 = new IntRef(0)
+   *     val r2 = new IntRef(1)
+   *     val modRef = if (b) r1 else r2
+   *     modRef.elem += 10               // M1: cannot rewrite: which local to write? same as E1.
+   *     (if (b) r1 else r2).elem += 10  // M2: cannot change an Int on the stack
+   *     (r1.elem, r2.elem)
+   *   }
+   *
+   *
+   * E3: escaping boxes with multiple boxing operations cannot be rewritten.
+   *   M1: see E1.
+   *   M2: at *, instead of an Integer, an Int is on the stack, but the escape method expects an
+   *       Integer. We cannot just create a box at this point: if there are multiple escapes (or
+   *       an escape is executed more than once), the difference could be observed (reference
+   *       equality).
+   *
+   *   def e3(b: Boolean) = {
+   *     val i: Integer = box(1)
+   *     val j: Integer = box(2)
+   *     escape(if (b) i else j)  // *
+   *     unbox(if (b) i else j)
+   *  }
+   *
+   *
+   * E4: M1 supports rewriting unbox operations of immutable boxes that escape
+   *   def e4 = {
+   *     val i: Integer = box(10) // 10 is stored into a new local, loaded as argument for the box call
+   *     escape(i)                // not changed, still loads the local i holding the box
+   *     unbox(i)                 // rewritten to a pop (of the box) and a load of the local variable
+   *   }
+   *
+   *
+   * E4 seems to be a bit of a corner case, but it's necessary to unblock box eliminations with
+   * mutual dependencies. Example:
+   *
+   *   val ((a, b), c) = ((1, 2), 3)
+   *   a + b + c
+   *
+   * generates (after a few cleanups) the following (pseudo-bytecode, ignoring primitive boxing, specialization):
+   *
+   *   load 1, load 2, new Tuple2  // stack: Tuple2
+   *   load 3                      // stack: Tuple2; Int
+   *   val local1 = new Tuple2
+   *   val local2 = local1._1.asInstanceOf[Tuple2]
+   *   val c = local1._2.asInstanceOf[Int]
+   *   if (local2 == null) throw new MatchError(local1)
+   *   val a = local2._1
+   *   val b = local2._2
+   *   a + b + c
+   *
+   * In order to eliminate the tuples, we first need to eliminate the outer tuple (stored in local1)
+   *   - single box operation, so we use M1
+   *   - there are three consumers of the outer tuple: `local1._1`, `local1._2` and
+   *     `new MatchError(local1)`. in the last one, the tuple escapes.
+   *   - note that the MatchError creation is dead code: local2 is never null. However, our nullness
+   *     analysis cannot identify this: it does not track nullness through tuple stores and loads.
+   *   - if we re-write the non-escaping consumers of the outer tuple, but keep the tuple allocation
+   *     and the escaping consumer, we get the following:
+   *
+   *   load 1, load 2
+   *   val newLocal1 = new Tuple2; load newLocal1  // stack: Tuple2
+   *   val newLocal2 = 3; load newLocal2           // stack: Tuple2; Int
+   *   val local1 = new Tuple2
+   *   val local2 = newLocal1
+   *   val c = newLocal2
+   *   if (local2 == null) throw new MatchError(local1)
+   *   val a = local2._1
+   *   val b = local2._2
+   *   a + b + c
+   *
+   * At this point, the nullness analysis sees that `local2 == null` is false, dead code elimination
+   * removes the `throw new MatchError(local1)`. After eliminating the allocation of the outer tuple,
+   * the inner tuple (stored in newLocal1) can also be eliminated.
+   *
+   *
+   * Special case for tuples wrt specialization: a tuple getter may box or unbox the value stored
+   * in the tuple: calling `_1` on a `Tuple2$mcII$sp` boxes the primitive Int stored in the tuple.
+   * Similarly, calling `_1$mcI$sp` on a non-specialized `Tuple2` unboxes the Integer in the tuple.
+   * When eliminating such getters, we have to introduce appropriate box / unbox calls.
+   *
+   *
+   * TODO: add new calls (box / unbox) to the call graph (not urgent)
+   * TODO: update the call graph because stack heights change (not urgent).
+   *   this may also affect other optimizations, we ignored the issue so far. check how stack
+   *   heights stored in the call graph are used.
+   * Note: these tasks are not urgent because the call graph is not currently used during / after
+   * method-local optimizations, only before to perform inlining and closure rewriting.
+   */
+  def boxUnboxElimination(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForSourceValue(method) && {
+      val toInsertBefore = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]]
+      val toReplace = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]]
+      val toDelete = mutable.Set.empty[AbstractInsnNode]
+
+      val knownHandled = mutable.Set.empty[AbstractInsnNode]
+
+      lazy val prodCons = new ProdConsAnalyzer(method, owner)
+
+      var nextLocal = method.maxLocals
+      def getLocal(size: Int) = {
+        val r = nextLocal
+        nextLocal += size
+        r
+      }
+
+      var maxStackGrowth = 0
+
+      /** Method M1 for eliminating box-unbox pairs (see doc comment in the beginning of this file) */
+      def replaceBoxOperationsSingleCreation(creation: BoxCreation, finalCons: Set[BoxConsumer], boxKind: BoxKind, keepBox: Boolean): Unit = {
+        /**
+         * If the box is eliminated, all copy operations (loads, stores, others) of the box need to
+         * be removed. This method returns all copy operations that should be removed.
+         *
+         * Returns `None` in case some exotic copy operation is found that cannot be removed
+         * (DUP2_X1 and friends - these are never emitted by scalac). In this case, the box cannot
+         * be eliminated.
+         */
+        def copyOpsToEliminate: Option[Set[AbstractInsnNode]] = {
+          var elidableCopyOps = Set.empty[AbstractInsnNode]
+          var replaceOK = true
+          val copyOps = new CopyOpsIterator(Set(creation), finalCons, prodCons)
+          while (replaceOK && copyOps.hasNext) copyOps.next() match {
+            case vi: VarInsnNode =>
+              elidableCopyOps += vi
+
+            case copyOp if copyOp.getOpcode == DUP =>
+              elidableCopyOps += copyOp
+
+            case _ =>
+              replaceOK = false
+          }
+          if (replaceOK) Some(elidableCopyOps) else None
+        }
+
+        val canRewrite = keepBox || (copyOpsToEliminate match {
+          case Some(copyOps) =>
+            toDelete ++= copyOps
+            true
+
+          case _ => false
+        })
+
+        if (canRewrite) {
+          val localSlots: Vector[(Int, Type)] = boxKind.boxedTypes.map(tp => (getLocal(tp.getSize), tp))(collection.breakOut)
+
+          // store boxed value(s) into localSlots
+          val storeOps = localSlots.toList reverseMap { case (slot, tp) =>
+            new VarInsnNode(tp.getOpcode(ISTORE), slot)
+          }
+          val storeInitialValues = creation.loadInitialValues match {
+            case Some(ops) => ops ::: storeOps
+            case None => storeOps
+          }
+          if (keepBox) {
+            val loadOps: List[VarInsnNode] = localSlots.map({ case (slot, tp) =>
+              new VarInsnNode(tp.getOpcode(ILOAD), slot)
+            })(collection.breakOut)
+            toInsertBefore(creation.valuesConsumer) = storeInitialValues ::: loadOps
+          } else {
+            toReplace(creation.valuesConsumer) = storeInitialValues
+            toDelete ++= creation.allInsns - creation.valuesConsumer
+          }
+
+          // rewrite consumers
+          finalCons foreach {
+            case write: StaticSetterOrInstanceWrite =>
+              assert(!keepBox, s"cannot eliminate box write if the box remains (and escapes): $write")
+              val (slot, tp) = localSlots(boxKind.extractedValueIndex(write))
+              val storeOp = new VarInsnNode(tp.getOpcode(ISTORE), slot)
+              toReplace(write.consumer) = List(storeOp)
+
+            case c: EscapingConsumer =>
+              assert(keepBox, s"found escaping consumer, but box is eliminated: $c")
+
+            case extraction =>
+              val (slot, tp) = localSlots(boxKind.extractedValueIndex(extraction))
+              val loadOps = new VarInsnNode(tp.getOpcode(ILOAD), slot) :: extraction.postExtractionAdaptationOps(tp)
+              if (keepBox) toReplace(extraction.consumer) = getPop(1) :: loadOps
+              else toReplace(extraction.consumer) = loadOps
+              toDelete ++= extraction.allInsns - extraction.consumer
+          }
+        }
+      }
+
+      /** Method M2 for eliminating box-unbox pairs (see doc comment in the beginning of this file) */
+      def replaceBoxOperationsMultipleCreations(allCreations: Set[BoxCreation], allConsumers: Set[BoxConsumer], boxKind: BoxKind): Unit = {
+        /**
+         * If a single-value size-1 box is eliminated, local variables slots holding the box are
+         * reused to hold the unboxed value. In case there's an entry for that local variable in the
+         * method's local variables table (debug info), adapt the type.
+         *
+         * If there are multiple entries for a local variable that's changing types, then all
+         * entries for that variable are deleted - it's not obvious how to find the correct entry.
+         * Note that scalac never re-uses local variable slots for non-overlapping locals. Also note
+         * that all locals that are newly created during the optimizer don't have an entry either.
+         *
+         * Finally, note that variables that become unused are removed later from the table by
+         * removeUnusedLocalVariableNodes in LocalOpt.
+         *
+         * Unlike modifications that affect the method's instructions (which uses toReplace etc),
+         * we can directly modify the local variable table - it does not affect the frames of the
+         * ProdCons analysis.
+         */
+        def updateLocalVariableTypes(reTypedLocals: Map[Int, Type]): Unit = {
+          lazy val localsByIndex = method.localVariables.asScala.groupBy(_.index)
+          for ((index, tp) <- reTypedLocals) localsByIndex.get(index).map(_.toList) match {
+            case Some(List(local)) =>
+              local.desc = tp.getDescriptor
+            case Some(locals) =>
+              locals foreach method.localVariables.remove
+            case _ =>
+          }
+        }
+
+        /** Remove box creations - leave the boxed value(s) on the stack instead. */
+        def replaceCreationOps(): Unit = {
+          for (creation <- allCreations) creation.loadInitialValues match {
+            case None =>
+              toDelete ++= creation.allInsns
+
+            case Some(ops) =>
+              toReplace(creation.valuesConsumer) = ops
+              toDelete ++= (creation.allInsns - creation.valuesConsumer)
+          }
+        }
+
+        /**
+         * Replace a value extraction operation. For a single-value box, the extraction operation can
+         * just be removed. An extraction from a multi-value box is replaced by POP operations for the
+         * non-used values, and an xSTORE / xLOAD for the extracted value. Example: tuple3._2 becomes
+         * POP; xSTORE n; POP; xLOAD n.
+         */
+        def replaceExtractionOps(): Unit = {
+          if (boxKind.boxedTypes.lengthCompare(1) == 0) {
+            // fast path for single-value boxes
+            allConsumers.foreach(extraction => extraction.postExtractionAdaptationOps(boxKind.boxedTypes.head) match {
+              case Nil =>
+                toDelete ++= extraction.allInsns
+              case ops =>
+                toReplace(extraction.consumer) = ops
+                toDelete ++= extraction.allInsns - extraction.consumer
+            })
+          } else {
+            for (extraction <- allConsumers) {
+              val valueIndex = boxKind.extractedValueIndex(extraction)
+              val replacementOps = if (valueIndex == 0) {
+                val pops = boxKind.boxedTypes.tail.map(t => getPop(t.getSize))
+                pops ::: extraction.postExtractionAdaptationOps(boxKind.boxedTypes.head)
+              } else {
+                var loadOps: List[AbstractInsnNode] = null
+                val consumeStack = boxKind.boxedTypes.zipWithIndex reverseMap {
+                  case (tp, i) =>
+                    if (i == valueIndex) {
+                      val resultSlot = getLocal(tp.getSize)
+                      loadOps = new VarInsnNode(tp.getOpcode(ILOAD), resultSlot) :: extraction.postExtractionAdaptationOps(tp)
+                      new VarInsnNode(tp.getOpcode(ISTORE), resultSlot)
+                    } else {
+                      getPop(tp.getSize)
+                    }
+                }
+                consumeStack ::: loadOps
+              }
+              toReplace(extraction.consumer) = replacementOps
+              toDelete ++= extraction.allInsns - extraction.consumer
+            }
+          }
+        }
+
+        checkCopyOpReplacements(allCreations, allConsumers, boxKind.boxedTypes, nextLocal, prodCons) match {
+          case Some((replacements, nextCopyOpLocal, reTypedLocals)) =>
+            toReplace ++= replacements
+            updateLocalVariableTypes(reTypedLocals)
+            nextLocal = nextCopyOpLocal
+            replaceCreationOps()
+            replaceExtractionOps()
+            // Conservative (safe) value for stack growth. In every frame that initially has a multi-value
+            // box on the stack, the stack now contains all of the individual values. So for every eliminated
+            // box, the maxStack may be up to N-1 slots larger.
+            maxStackGrowth += boxKind.boxedTypes.length - 1
+
+          case None =>
+        }
+      }
+
+      val it = method.instructions.iterator
+      while (it.hasNext) {
+        val insn = it.next()
+        if (!knownHandled(insn)) BoxKind.valueCreationKind(insn, prodCons) match {
+          case Some((boxCreation, boxKind)) =>
+            allCreationsConsumers(boxCreation, boxKind, prodCons) match {
+              case Some((allCreations, allConsumers)) =>
+                val (escapingConsumers, boxConsumers) = allConsumers.partition(_.isEscaping)
+                if (boxConsumers.nonEmpty) {
+                  for (c <- allCreations) knownHandled ++= c.allInsns
+                  for (e <- allConsumers) knownHandled ++= e.allInsns
+
+                  val hasEscaping = escapingConsumers.nonEmpty
+                  val hasWrite = allConsumers.exists(_.isWrite)
+                  if (!hasEscaping && !hasWrite) {
+                    // M2 -- see doc comment in the beginning of this file
+                    // If both M1 and M2 can be applied, we prefer M2 because it doesn't introduce new locals.
+                    replaceBoxOperationsMultipleCreations(allCreations, allConsumers, boxKind)
+                  } else if (allCreations.size == 1 && (!hasEscaping || !boxKind.isMutable)) {
+                    // M1 -- see doc comment in the beginning of this file
+                    replaceBoxOperationsSingleCreation(allCreations.head, allConsumers, boxKind, keepBox = hasEscaping)
+                  }
+                }
+
+              case None =>
+            }
+
+          case None =>
+        }
+      }
+
+      def removeFromCallGraph(insn: AbstractInsnNode): Unit = insn match {
+        case mi: MethodInsnNode => callGraph.removeCallsite(mi, method)
+        case _ =>
+      }
+
+      for ((location, ops) <- toInsertBefore; op <- ops)
+        method.instructions.insertBefore(location, op)
+
+      for ((oldOp, newOps) <- toReplace) {
+        for (newOp <- newOps) method.instructions.insertBefore(oldOp, newOp)
+        method.instructions.remove(oldOp)
+        removeFromCallGraph(oldOp)
+      }
+
+      for (op <- toDelete) {
+        method.instructions.remove(op)
+        removeFromCallGraph(op)
+      }
+
+      method.maxLocals = nextLocal
+      method.maxStack += maxStackGrowth
+      toInsertBefore.nonEmpty || toReplace.nonEmpty || toDelete.nonEmpty
+    }
+  }
+
+  /**
+   * Given a box creations operation
+   *   - find all ultimate consumers for the produced value. then:
+   *     - for all consumed values, find all producer operations. check that all are box creations
+   *       - recurse until reaching a fixpoint
+   *
+   * Returns a set of box creations and a set of box consumers. Note that the box consumers may
+   * contain [[EscapingConsumer]]s, even if there are multiple box creation operations. The callee
+   * will handle this case (and not attempt to eliminate the box).
+   */
+  def allCreationsConsumers(initialCreation: BoxCreation, boxKind: BoxKind, prodCons: ProdConsAnalyzer): Option[(Set[BoxCreation], Set[BoxConsumer])] = {
+    var creations = Set(initialCreation)
+    var consumers = Set.empty[BoxConsumer]
+
+    def addCreations(boxConsumer: BoxConsumer): Boolean = {
+      val newProds = boxConsumer.boxProducers(prodCons).filterNot(prod => creations.exists(_.producer == prod))
+      newProds.forall(prod => boxKind.checkBoxCreation(prod, prodCons) match {
+        case Some(boxCreation) =>
+          creations += boxCreation
+          addBoxConsumers(boxCreation)
+
+        case _ => false
+      })
+    }
+
+    def addBoxConsumers(creation: BoxCreation): Boolean = {
+      val newCons = creation.boxConsumers(prodCons, ultimate = true).filterNot(cons => consumers.exists(_.consumer == cons))
+      newCons.forall(cons => boxKind.checkBoxConsumer(cons, prodCons) match {
+        case Some(boxConsumer) =>
+          consumers += boxConsumer
+          addCreations(boxConsumer)
+
+        case _ =>
+          creations.size <= 1 && {
+            // If there's a single box creation, the box operations can still be rewritten
+            consumers += EscapingConsumer(cons)
+            true
+          }
+      })
+    }
+
+    if (addBoxConsumers(initialCreation)) Some((creations, consumers))
+    else None
+  }
+
+  /**
+   * Takes two sets `initialProds` and `finalCons` such that all boxes produced by the first set
+   * are only consumed by an operation in the second set.
+   *
+   * Returns a map that replaces copy operations (ALOAD / ASTORE) between the producers and
+   * consumers with corresponding copy operations for the values stored in the box. The returned
+   * `Int` value returns the next free local variable slot.
+   *
+   * Examples:
+   *   - for an Integer box, an ASTORE x is simply replaced by ISTORE x
+   *   - for a pair of two references, an ASTORE x is replaced by `ASTORE x1; ASTORE x2` where x1
+   *     and x2 are fresh locals
+   *
+   * Not all copy operations can be supported: DUP only works for single-value boxes, the more
+   * exotic copy operations (DUP2_X2) are not supported (note that Scalac never emits them). If a
+   * copy operation cannot be replaced, this method returns `None`.
+   */
+  def checkCopyOpReplacements(initialProds: Set[BoxCreation], finalCons: Set[BoxConsumer], valueTypes: List[Type], nextLocal: Int, prodCons: ProdConsAnalyzer): Option[(Map[AbstractInsnNode, List[AbstractInsnNode]], Int, Map[Int, Type])] = {
+    var replacements = Map.empty[AbstractInsnNode, List[AbstractInsnNode]]
+    var reTypedLocals = Map.empty[Int, Type]
+
+    var nextCopyOpLocal = nextLocal
+    val newLocalsMap: mutable.LongMap[List[(Type, Int)]] = mutable.LongMap.empty
+    def newLocals(index: Int) = newLocalsMap.getOrElseUpdate(index, valueTypes match {
+      case List(t) if t.getSize == 1 =>
+        reTypedLocals += index -> t
+        List((t, index))
+      case _ => valueTypes.map(t => {
+        val newIndex = nextCopyOpLocal
+        nextCopyOpLocal += t.getSize
+        (t, newIndex)
+      })
+    })
+
+    var replaceOK = true
+    val copyOps = new CopyOpsIterator(initialProds, finalCons, prodCons)
+    while (replaceOK && copyOps.hasNext) copyOps.next() match {
+      case vi: VarInsnNode =>
+        val isLoad = vi.getOpcode == ALOAD
+        val typedVarOp = (tp: (Type, Int)) => {
+          val opc = tp._1.getOpcode(if (isLoad) ILOAD else ISTORE)
+          new VarInsnNode(opc, tp._2)
+        }
+        val locs = newLocals(vi.`var`)
+        replacements += vi -> (if (isLoad) locs.map(typedVarOp) else locs.reverseMap(typedVarOp))
+
+      case copyOp =>
+        if (copyOp.getOpcode == DUP && valueTypes.lengthCompare(1) == 0) {
+          if (valueTypes.head.getSize == 2)
+            replacements += copyOp -> List(new InsnNode(DUP2))
+        } else {
+          replaceOK = false
+        }
+    }
+    if (replaceOK) Some((replacements, nextCopyOpLocal, reTypedLocals)) else None
+  }
+
+  /**
+   * For a set of box creation operations and a corresponding set of box consumer operations,
+   * this iterator returns all copy operations (load, store, dup) that are in between.
+   */
+  class CopyOpsIterator(initialCreations: Set[BoxCreation], finalCons: Set[BoxConsumer], prodCons: ProdConsAnalyzer) extends Iterator[AbstractInsnNode] {
+    private var queue = mutable.Queue.empty[AbstractInsnNode] ++ initialCreations.iterator.flatMap(_.boxConsumers(prodCons, ultimate = false))
+
+    // a single copy operation can consume multiple producers: val a = if (b) box(1) else box(2).
+    // the `ASTORE a` has two producers (the two box operations). we need to handle it only once.
+    private val visited = mutable.Set.empty[AbstractInsnNode]
+
+    private val boxConsumingOps = finalCons.map(_.consumer)
+
+    @tailrec private def advanceToNextCopyOp(): Unit = {
+      if (queue.nonEmpty) {
+        val h = queue.front
+        if (visited(h) || boxConsumingOps(h)) {
+          queue.dequeue()
+          advanceToNextCopyOp()
+        }
+      }
+    }
+
+    def hasNext: Boolean = {
+      advanceToNextCopyOp()
+      queue.nonEmpty
+    }
+
+    def next(): AbstractInsnNode = {
+      advanceToNextCopyOp()
+      val r = queue.dequeue()
+      visited += r
+      queue ++= prodCons.consumersOfOutputsFrom(r)
+      r
+    }
+  }
+
+  trait BoxKind {
+    def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation]
+    def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer]
+    def boxedTypes: List[Type]
+    def extractedValueIndex(extraction: BoxConsumer): Int
+    def isMutable: Boolean
+  }
+
+  object BoxKind {
+    def valueCreationKind(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[(BoxCreation, BoxKind)] = {
+      PrimitiveBox.checkPrimitiveBox(insn, None, prodCons) orElse
+        Ref.checkRefCreation(insn, None, prodCons) orElse
+        Tuple.checkTupleCreation(insn, None, prodCons)
+    }
+
+    /**
+     * Check if `newOp` is part of a standard object construction pattern in which:
+     *
+     *   NEW T
+     *   DUP
+     *   [load constructor args]
+     *   INVOKESPECIAL T.init
+     *
+     * The method ensures that the entire construction pattern is closed in itself, without any
+     * branches going in or out. This is checked by looking at producers / consumers:
+     *   - `DUP` is the only consumer of `NEW`, and vice versa
+     *   - `DUP` the only producer for the receiver of the constructor call
+     *   - The set of consumers of `DUP` without the constructor call is the same as
+     *     the set of consumers of the value on the stack top after the constructor call
+     */
+    def checkInstanceCreation(newOp: TypeInsnNode, prodCons: ProdConsAnalyzer): Option[(InsnNode, MethodInsnNode)] = {
+      val newCons = prodCons.consumersOfOutputsFrom(newOp)
+      if (newCons.size == 1 && newCons.head.getOpcode == DUP) {
+        val dupOp = newCons.head.asInstanceOf[InsnNode]
+        if (prodCons.producersForInputsOf(dupOp) == Set(newOp)) {
+          val dupCons = prodCons.consumersOfOutputsFrom(dupOp)
+          val initCalls = dupCons collect {
+            case mi: MethodInsnNode if mi.name == GenBCode.INSTANCE_CONSTRUCTOR_NAME && mi.owner == newOp.desc => mi
+          }
+          if (initCalls.size == 1) {
+            val initCall = initCalls.head
+            val numArgs = Type.getArgumentTypes(initCall.desc).length
+            val receiverProds = prodCons.producersForValueAt(initCall, prodCons.frameAt(initCall).stackTop - numArgs)
+            if (receiverProds == Set(dupOp)) {
+              val dupConsWithoutInit = dupCons - initCall
+              val afterInit = initCall.getNext
+              val stackTopAfterInit = prodCons.frameAt(afterInit).stackTop
+              val initializedInstanceCons = prodCons.consumersOfValueAt(afterInit, stackTopAfterInit)
+              if (initializedInstanceCons == dupConsWithoutInit && prodCons.producersForValueAt(afterInit, stackTopAfterInit) == Set(dupOp)) {
+                return Some((dupOp, initCall))
+              }
+            }
+          }
+        }
+      }
+      None
+    }
+
+    /**
+     * If `mi` is an invocation of a method on Predef, check if the receiver is a GETSTATIC of
+     * Predef.MODULE$ and return it.
+     */
+    def checkReceiverPredefLoad(mi: MethodInsnNode, prodCons: ProdConsAnalyzer): Option[AbstractInsnNode] = {
+      val numArgs = Type.getArgumentTypes(mi.desc).length
+      val receiverProds = prodCons.producersForValueAt(mi, prodCons.frameAt(mi).stackTop - numArgs)
+      if (receiverProds.size == 1) {
+        val prod = receiverProds.head
+        if (isPredefLoad(prod) && prodCons.consumersOfOutputsFrom(prod) == Set(mi)) return Some(prod)
+      }
+      None
+    }
+  }
+
+  case class PrimitiveBox(boxedType: Type, boxClass: InternalName) extends BoxKind {
+    import PrimitiveBox._
+    def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkPrimitiveBox(insn, Some(this), prodCons).map(_._1)
+    def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkPrimitiveUnbox(insn, this, prodCons)
+    def boxedTypes: List[Type] = List(boxedType)
+    def extractedValueIndex(extraction: BoxConsumer): Int = 0
+    def isMutable = false
+  }
+
+  object PrimitiveBox {
+    private def boxedType(mi: MethodInsnNode) = Type.getArgumentTypes(mi.desc)(0)
+
+    private def boxClass(mi: MethodInsnNode) = {
+      if (mi.name == GenBCode.INSTANCE_CONSTRUCTOR_NAME) mi.owner
+      else Type.getReturnType(mi.desc).getInternalName
+    }
+
+    def checkPrimitiveBox(insn: AbstractInsnNode, expectedKind: Option[PrimitiveBox], prodCons: ProdConsAnalyzer): Option[(BoxCreation, PrimitiveBox)] = {
+      // mi is either a box factory or a box constructor invocation
+      def checkKind(mi: MethodInsnNode) = expectedKind match {
+        case Some(kind) => if (kind.boxClass == boxClass(mi)) expectedKind else None
+        case None => Some(PrimitiveBox(boxedType(mi), boxClass(mi)))
+      }
+
+      insn match {
+        case mi: MethodInsnNode =>
+          if (isScalaBox(mi) || isJavaBox(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = None), _))
+          else if (isPredefAutoBox(mi))
+            for (predefLoad <- BoxKind.checkReceiverPredefLoad(mi, prodCons); kind <- checkKind(mi))
+              yield (ModuleFactory(predefLoad, mi), kind)
+          else None
+
+        case ti: TypeInsnNode if ti.getOpcode == NEW =>
+          for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isPrimitiveBoxConstructor(initCall); kind <- checkKind(initCall))
+            yield (InstanceCreation(ti, dupOp, initCall), kind)
+
+        case _ => None
+      }
+    }
+
+    def checkPrimitiveUnbox(insn: AbstractInsnNode, kind: PrimitiveBox, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = {
+      def typeOK(mi: MethodInsnNode) = kind.boxedType == Type.getReturnType(mi.desc)
+      insn match {
+        case mi: MethodInsnNode =>
+          if ((isScalaUnbox(mi) || isJavaUnbox(mi)) && typeOK(mi)) Some(StaticGetterOrInstanceRead(mi))
+          else if (isPredefAutoUnbox(mi) && typeOK(mi)) BoxKind.checkReceiverPredefLoad(mi, prodCons).map(ModuleGetter(_, mi))
+          else None
+
+        case _ => None
+      }
+    }
+  }
+
+  case class Ref(boxedType: Type, refClass: InternalName) extends BoxKind {
+    import Ref._
+    def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkRefCreation(insn, Some(this), prodCons).map(_._1)
+    def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkRefConsumer(insn, this, prodCons)
+    def boxedTypes: List[Type] = List(boxedType)
+    def extractedValueIndex(extraction: BoxConsumer): Int = 0
+    def isMutable = true
+  }
+
+  object Ref {
+    private def boxedType(mi: MethodInsnNode): Type = runtimeRefClassBoxedType(mi.owner)
+    private def refClass(mi: MethodInsnNode): InternalName = mi.owner
+    private def loadZeroValue(refZeroCall: MethodInsnNode): List[AbstractInsnNode] = List(loadZeroForTypeSort(runtimeRefClassBoxedType(refZeroCall.owner).getSort))
+
+    def checkRefCreation(insn: AbstractInsnNode, expectedKind: Option[Ref], prodCons: ProdConsAnalyzer): Option[(BoxCreation, Ref)] = {
+      def checkKind(mi: MethodInsnNode): Option[Ref] = expectedKind match {
+        case Some(kind) => if (kind.refClass == refClass(mi)) expectedKind else None
+        case None => Some(Ref(boxedType(mi), refClass(mi)))
+      }
+
+      insn match {
+        case mi: MethodInsnNode =>
+          if (isRefCreate(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = None), _))
+          else if (isRefZero(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = Some(loadZeroValue(mi))), _))
+          else None
+
+        case ti: TypeInsnNode if ti.getOpcode == NEW =>
+          for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isRuntimeRefConstructor(initCall); kind <- checkKind(initCall))
+            yield (InstanceCreation(ti, dupOp, initCall), kind)
+
+        case _ => None
+      }
+    }
+
+    def checkRefConsumer(insn: AbstractInsnNode, kind: Ref, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = insn match {
+      case fi: FieldInsnNode if fi.owner == kind.refClass && fi.name == "elem" =>
+        if (fi.getOpcode == GETFIELD) Some(StaticGetterOrInstanceRead(fi))
+        else if (fi.getOpcode == PUTFIELD) Some(StaticSetterOrInstanceWrite(fi))
+        else None
+
+      case _ => None
+    }
+  }
+
+  case class Tuple(boxedTypes: List[Type], tupleClass: InternalName) extends BoxKind {
+    import Tuple._
+    def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkTupleCreation(insn, Some(this), prodCons).map(_._1)
+    def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkTupleExtraction(insn, this, prodCons)
+    def extractedValueIndex(extraction: BoxConsumer): Int = extraction match {
+      case StaticGetterOrInstanceRead(mi: MethodInsnNode) => tupleGetterIndex(mi.name)
+      case PrimitiveBoxingGetter(mi)                      => tupleGetterIndex(mi.name)
+      case PrimitiveUnboxingGetter(mi, _)                 => tupleGetterIndex(mi.name)
+      case _ => throw new AssertionError(s"Expected tuple getter, found $extraction")
+    }
+    def isMutable = false
+  }
+
+  object Tuple {
+    private def boxedTypes(mi: MethodInsnNode): List[Type] = Type.getArgumentTypes(mi.desc).toList
+    private def tupleClass(mi: MethodInsnNode): InternalName = mi.owner
+
+    def checkTupleCreation(insn: AbstractInsnNode, expectedKind: Option[Tuple], prodCons: ProdConsAnalyzer): Option[(BoxCreation, Tuple)] = {
+      def checkKind(mi: MethodInsnNode): Option[Tuple] = expectedKind match {
+        case Some(kind) => if (kind.tupleClass == tupleClass(mi)) expectedKind else None
+        case None => Some(Tuple(boxedTypes(mi), tupleClass(mi)))
+      }
+
+      insn match {
+        // no need to check for TupleN.apply: the compiler transforms case companion apply calls to constructor invocations
+        case ti: TypeInsnNode if ti.getOpcode == NEW =>
+          for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isTupleConstructor(initCall); kind <- checkKind(initCall))
+            yield (InstanceCreation(ti, dupOp, initCall), kind)
+
+        case _ => None
+      }
+    }
+
+    private val specializedTupleClassR = "scala/Tuple[12]\\$mc[IJDCZ]{1,2}\\$sp".r
+    private def isSpecializedTupleClass(tupleClass: InternalName) = specializedTupleClassR.pattern.matcher(tupleClass).matches
+
+    private val specializedTupleGetterR = "_[12]\\$mc[IJDCZ]\\$sp".r
+    private def isSpecializedTupleGetter(mi: MethodInsnNode) = specializedTupleGetterR.pattern.matcher(mi.name).matches
+
+    private val tupleGetterR = "_\\d\\d?".r
+    private def isTupleGetter(mi: MethodInsnNode) = tupleGetterR.pattern.matcher(mi.name).matches
+
+    def checkTupleExtraction(insn: AbstractInsnNode, kind: Tuple, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = {
+      val expectedTupleClass = kind.tupleClass
+      insn match {
+        case mi: MethodInsnNode =>
+          val tupleClass = mi.owner
+          if (isSpecializedTupleClass(expectedTupleClass)) {
+            val typeOK = tupleClass == expectedTupleClass || tupleClass == expectedTupleClass.substring(0, expectedTupleClass.indexOf('$'))
+            if (typeOK) {
+              if (isSpecializedTupleGetter(mi)) return Some(StaticGetterOrInstanceRead(mi))
+              else if (isTupleGetter(mi)) return Some(PrimitiveBoxingGetter(mi))
+            }
+          } else if (expectedTupleClass == tupleClass) {
+            if (isSpecializedTupleGetter(mi)) return Some(PrimitiveUnboxingGetter(mi, Type.getReturnType(mi.desc)))
+            else if (isTupleGetter(mi)) return Some(StaticGetterOrInstanceRead(mi))
+          }
+
+        case _ =>
+      }
+      None
+    }
+
+    private val getterIndexPattern = "_(\\d{1,2}).*".r
+    def tupleGetterIndex(getterName: String) = getterName match { case getterIndexPattern(i) => i.toInt - 1 }
+  }
+
+  // TODO: add more
+  // case class ValueClass(valueClass: Type, valueType: Type) extends BoxKind
+
+  sealed trait BoxCreation {
+    // to support box creation operations that don't consume an initial value from the stack, e.g., IntRef.zero
+    val loadInitialValues: Option[List[AbstractInsnNode]]
+
+    /**
+     * The instruction that produces the box value; for instance creations, the `NEW` operation.
+     */
+    def producer: AbstractInsnNode
+
+    /**
+     * The instruction that consumes the boxed values; for instance creations, the `init` call.
+     */
+    def valuesConsumer: MethodInsnNode = this match {
+      case StaticFactory(call, _) => call
+      case ModuleFactory(_, call) => call
+      case InstanceCreation(_, _, initCall) => initCall
+    }
+
+    def allInsns: Set[AbstractInsnNode] = this match {
+      case StaticFactory(c, _) => Set(c)
+      case ModuleFactory(m, c) => Set(m, c)
+      case InstanceCreation(n, d, i) => Set(n, d, i)
+    }
+
+    /**
+     * The consumers of the box produced by this box creation. If `ultimate` is true, then the
+     * final consumers are returned (e.g., an unbox operation), otherwise direct consumers (e.g.,
+     * a store operation).
+     */
+    def boxConsumers(prodCons: ProdConsAnalyzer, ultimate: Boolean): Set[AbstractInsnNode] = {
+      val startInsn = this match {
+        // for the non-transitive case (ultimate == false), it's important to start at the `dupOp`,
+        // not the `newOp` - look at the BoxCreation as a black box, get its consumers.
+        case InstanceCreation(_, dupOp, _) => dupOp
+        case _ => producer
+      }
+      val cons = if (ultimate) prodCons.ultimateConsumersOfOutputsFrom(startInsn) else prodCons.consumersOfOutputsFrom(startInsn)
+      this match {
+        case InstanceCreation(_, _, initCall) => cons - initCall
+        case _ => cons
+      }
+    }
+  }
+
+  case class StaticFactory(producer: MethodInsnNode, loadInitialValues: Option[List[AbstractInsnNode]]) extends BoxCreation
+  case class ModuleFactory(moduleLoad: AbstractInsnNode, producer: MethodInsnNode) extends BoxCreation {
+    val loadInitialValues: Option[List[AbstractInsnNode]] = None
+  }
+  case class InstanceCreation(newOp: TypeInsnNode, dupOp: InsnNode, initCall: MethodInsnNode) extends BoxCreation {
+    def producer = newOp
+    val loadInitialValues: Option[List[AbstractInsnNode]] = None
+  }
+
+  sealed trait BoxConsumer {
+    val consumer: AbstractInsnNode
+
+    def allInsns: Set[AbstractInsnNode] = this match {
+      case ModuleGetter(m, c) => Set(m, c)
+      case _ => Set(consumer)
+    }
+
+    /**
+     * The initial producers of the box value consumed by this box consumer
+     */
+    def boxProducers(prodCons: ProdConsAnalyzer): Set[AbstractInsnNode] = {
+      val stackTop = prodCons.frameAt(consumer).stackTop
+      val slot = if (isWrite) stackTop - 1 else stackTop
+      prodCons.initialProducersForValueAt(consumer, slot)
+    }
+
+    def isEscaping = this match {
+      case _: EscapingConsumer => true
+      case _ => false
+    }
+
+    def isWrite = this match {
+      case _: StaticSetterOrInstanceWrite => true
+      case _ => false
+    }
+
+    /**
+     * If this box consumer extracts a boxed value and applies a conversion, this method returns
+     * equivalent conversion operations. For example, invoking `_1$mcI$sp` on a non-specialized
+     * `Tuple2` extracts the Integer value and unboxes it.
+     */
+    def postExtractionAdaptationOps(typeOfExtractedValue: Type): List[AbstractInsnNode] = this match {
+      case PrimitiveBoxingGetter(_) => List(getScalaBox(typeOfExtractedValue))
+      case PrimitiveUnboxingGetter(_, unboxedPrimitive) => List(getScalaUnbox(unboxedPrimitive))
+      case _ => Nil
+    }
+  }
+
+  /** Static extractor (BoxesRunTime.unboxToInt) or GETFIELD or getter invocation */
+  case class StaticGetterOrInstanceRead(consumer: AbstractInsnNode) extends BoxConsumer
+  /** A getter that boxes the returned value, e.g., `Tuple2$mcII$sp._1` */
+  case class PrimitiveBoxingGetter(consumer: MethodInsnNode) extends BoxConsumer
+  /** A getter that unboxes the returned value, e.g., `Tuple2._1$mcI$sp` */
+  case class PrimitiveUnboxingGetter(consumer: MethodInsnNode, unboxedPrimitive: Type) extends BoxConsumer
+  /** An extractor method in a Scala module, e.g., `Predef.Integer2int` */
+  case class ModuleGetter(moduleLoad: AbstractInsnNode, consumer: MethodInsnNode) extends BoxConsumer
+  /** PUTFIELD or setter invocation */
+  case class StaticSetterOrInstanceWrite(consumer: AbstractInsnNode) extends BoxConsumer
+  /** An unknown box consumer */
+  case class EscapingConsumer(consumer: AbstractInsnNode) extends BoxConsumer
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
index a5b85e54e7..f2ff73c44d 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
@@ -9,13 +9,12 @@ package opt
 
 import scala.tools.asm
 import asm.tree._
-import scala.collection.convert.decorateAsScala._
+import scala.collection.JavaConverters._
+import scala.collection.{concurrent, mutable}
 import scala.tools.asm.Attribute
 import scala.tools.nsc.backend.jvm.BackendReporting._
-import scala.tools.nsc.io.AbstractFile
-import scala.tools.nsc.util.ClassFileLookup
+import scala.tools.nsc.util.ClassPath
 import BytecodeUtils._
-import ByteCodeRepository._
 import BTypes.InternalName
 import java.util.concurrent.atomic.AtomicLong
 
@@ -24,58 +23,91 @@ import java.util.concurrent.atomic.AtomicLong
  * classpath. Parsed classes are cached in the `classes` map.
  *
  * @param classPath The compiler classpath where classfiles are searched and read from.
- * @param classes   Cache for parsed ClassNodes. Also stores the source of the bytecode:
- *                  [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode
- *                  corresponds to a class being compiled.
- *                  The `Long` field encodes the age of the node in the map, which allows removing
- *                  old entries when the map grows too large.
- *                  For Java classes in mixed compilation, the map contains an error message: no
- *                  ClassNode is generated by the backend and also no classfile that could be parsed.
  */
-class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJavaSourceDefined: InternalName => Boolean, val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]]) {
+class ByteCodeRepository[BT <: BTypes](val classPath: ClassPath, val btypes: BT) {
+  import btypes._
+
+  /**
+   * Contains ClassNodes and the canonical path of the source file path of classes being compiled in
+   * the current compilation run.
+   */
+  val compilingClasses: concurrent.Map[InternalName, (ClassNode, String)] = recordPerRunCache(concurrent.TrieMap.empty)
+
+  /**
+   * Cache for parsed ClassNodes.
+   * The `Long` field encodes the age of the node in the map, which allows removing old entries when
+   * the map grows too large (see limitCacheSize).
+   * For Java classes in mixed compilation, the map contains an error message: no ClassNode is
+   * generated by the backend and also no classfile that could be parsed.
+   */
+  val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty)
 
   private val maxCacheSize = 1500
   private val targetSize   = 500
 
-  private val idCounter = new AtomicLong(0)
+  private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable {
+    def clear(): Unit = { this.set(0l) }
+  }
+  recordPerRunCache(lruCounter)
 
   /**
    * Prevent the code repository from growing too large. Profiling reveals that the average size
    * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb.
-   *
-   * We can only remove classes with `Source == Classfile`, those can be parsed again if requested.
    */
   private def limitCacheSize(): Unit = {
-    if (classes.count(c => c._2.isRight && c._2.right.get._2 == Classfile) > maxCacheSize) {
-      val removeId = idCounter.get - targetSize
-      val toRemove = classes.iterator.collect({
-        case (name, Right((_, Classfile, id))) if id < removeId => name
-      }).toList
-      toRemove foreach classes.remove
+    if (parsedClasses.size > maxCacheSize) {
+      // OK if multiple threads get here
+      val minimalLRU = parsedClasses.valuesIterator.collect({
+        case Right((_, lru)) => lru
+      }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue)
+      parsedClasses retain {
+        case (_, Right((_, lru))) => lru > minimalLRU
+        case _ => false
+      }
     }
   }
 
-  def add(classNode: ClassNode, source: Source) = {
-    classes(classNode.name) = Right((classNode, source, idCounter.incrementAndGet()))
+  def add(classNode: ClassNode, sourceFilePath: Option[String]) = sourceFilePath match {
+    case Some(path) if path != "<no file>" => compilingClasses(classNode.name) = (classNode, path)
+    case _                                 => parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet()))
+  }
+
+  private def parsedClassNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
+    val r = parsedClasses.get(internalName) match {
+      case Some(l @ Left(_)) => l
+      case Some(r @ Right((classNode, _))) =>
+        parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet()))
+        r
+      case None =>
+        limitCacheSize()
+        val res = parseClass(internalName).map((_, lruCounter.incrementAndGet()))
+        parsedClasses(internalName) = res
+        res
+    }
+    r.map(_._1)
   }
 
   /**
-   * The class node and source for an internal name. If the class node is not yet available, it is
-   * parsed from the classfile on the compile classpath.
+   * The class node and source file path (if the class is being compiled) for an internal name. If
+   * the class node is not yet available, it is parsed from the classfile on the compile classpath.
    */
-  def classNodeAndSource(internalName: InternalName): Either[ClassNotFound, (ClassNode, Source)] = {
-    val r = classes.getOrElseUpdate(internalName, {
-      limitCacheSize()
-      parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet()))
-    })
-    r.map(v => (v._1, v._2))
+  def classNodeAndSourceFilePath(internalName: InternalName): Either[ClassNotFound, (ClassNode, Option[String])] = {
+    compilingClasses.get(internalName) match {
+      case Some((c, p)) => Right((c, Some(p)))
+      case _            => parsedClassNode(internalName).map((_, None))
+    }
   }
 
   /**
    * The class node for an internal name. If the class node is not yet available, it is parsed from
    * the classfile on the compile classpath.
    */
-  def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = classNodeAndSource(internalName).map(_._1)
+  def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
+    compilingClasses.get(internalName) match {
+      case Some((c, _)) => Right(c)
+      case None         => parsedClassNode(internalName)
+    }
+  }
 
   /**
    * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`.
@@ -86,7 +118,6 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
    */
   def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = {
     def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = {
-      def msg = s"The field node $name$descriptor could not be found in class $classInternalName or any of its superclasses."
       classNode(parent) match {
         case Left(e)  => Left(FieldNotFound(name, descriptor, classInternalName, Some(e)))
         case Right(c) =>
@@ -105,33 +136,135 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
    * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`.
    * The declaration of the method may be in one of the parents.
    *
+   * Note that the JVM spec performs method lookup in two steps: resolution and selection.
+   *
+   * Method resolution, defined in jvms-5.4.3.3 and jvms-5.4.3.4, is the first step and is identical
+   * for all invocation styles (virtual, interface, special, static). If C is the receiver class
+   * in the invocation instruction:
+   *   1 find a matching method (name and descriptor) in C
+   *   2 then in C's superclasses
+   *   3 then find the maximally-specific matching superinterface methods, succeed if there's a
+   *     single non-abstract one. static and private methods in superinterfaces are not considered.
+   *   4 then pick a random non-static, non-private superinterface method.
+   *   5 then fail.
+   *
+   * Note that for an `invokestatic` instruction, a method reference `B.m` may resolve to `A.m`, if
+   * class `B` doesn't specify a matching method `m`, but the parent `A` does.
+   *
+   * Selection depends on the invocation style and is defined in jvms-6.5.
+   *   - invokestatic: invokes the resolved method
+   *   - invokevirtual / invokeinterface: searches for an override of the resolved method starting
+   *     at the dynamic receiver type. the search procedure is basically the same as in resolution,
+   *     but it fails at 4 instead of picking a superinterface method at random.
+   *   - invokespecial: if C is the receiver in the invocation instruction, searches for an override
+   *     of the resolved method starting at
+   *       - the superclass of the current class, if C is a superclass of the current class
+   *       - C otherwise
+   *     again, the search procedure is the same.
+   *
+   * In the method here we implement method *resolution*. Whether or not the returned method is
+   * actually invoked at runtime depends on the invocation instruction and the class hierarchy, so
+   * the users (e.g. the inliner) have to be aware of method selection.
+   *
+   * Note that the returned method may be abstract (ACC_ABSTRACT), native (ACC_NATIVE) or signature
+   * polymorphic (methods `invoke` and `invokeExact` in class `MethodHandles`).
+   *
    * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring
-   *         class, or an error message if the method could not be found.
+   *         class, or an error message if the method could not be found. An error message is also
+   *         returned if method resolution results in multiple default methods.
    */
   def methodNode(ownerInternalNameOrArrayDescriptor: String, name: String, descriptor: String): Either[MethodNotFound, (MethodNode, InternalName)] = {
-    // on failure, returns a list of class names that could not be found on the classpath
-    def methodNodeImpl(ownerInternalName: InternalName): Either[List[ClassNotFound], (MethodNode, InternalName)] = {
-      classNode(ownerInternalName) match {
-        case Left(e)  => Left(List(e))
-        case Right(c) =>
-          c.methods.asScala.find(m => m.name == name && m.desc == descriptor) match {
-            case Some(m) => Right((m, ownerInternalName))
-            case None    => findInParents(Option(c.superName) ++: c.interfaces.asScala.toList, Nil)
-          }
+    def findMethod(c: ClassNode): Option[MethodNode] = c.methods.asScala.find(m => m.name == name && m.desc == descriptor)
+
+    // https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-2.html#jvms-2.9: "In Java SE 8, the only
+    // signature polymorphic methods are the invoke and invokeExact methods of the class MethodHandle.
+    def isSignaturePolymorphic(owner: InternalName) = owner == coreBTypes.jliMethodHandleRef.internalName && (name == "invoke" || name == "invokeExact")
+
+    // Note: if `owner` is an interface, in the first iteration we search for a matching member in the interface itself.
+    // If that fails, the recursive invocation checks in the superclass (which is Object) with `publicInstanceOnly == true`.
+    // This is specified in jvms-5.4.3.4: interface method resolution only returns public, non-static methods of Object.
+    def findInSuperClasses(owner: ClassNode, publicInstanceOnly: Boolean = false): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = {
+      findMethod(owner) match {
+        case Some(m) if !publicInstanceOnly || (isPublicMethod(m) && !isStaticMethod(m)) => Right(Some((m, owner.name)))
+        case None =>
+          if (isSignaturePolymorphic(owner.name)) Right(Some((owner.methods.asScala.find(_.name == name).get, owner.name)))
+          else if (owner.superName == null) Right(None)
+          else classNode(owner.superName).flatMap(findInSuperClasses(_, isInterface(owner)))
       }
     }
 
-    // find the MethodNode in one of the parent classes
-    def findInParents(parents: List[InternalName], failedClasses: List[ClassNotFound]): Either[List[ClassNotFound], (MethodNode, InternalName)] = parents match {
-      case x :: xs => methodNodeImpl(x).left.flatMap(failed => findInParents(xs, failed ::: failedClasses))
-      case Nil     => Left(failedClasses)
+    def findInInterfaces(initialOwner: ClassNode): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = {
+      val visited = mutable.Set.empty[InternalName]
+      val found = mutable.ListBuffer.empty[(MethodNode, ClassNode)]
+
+      def findIn(owner: ClassNode): Option[ClassNotFound] = {
+        for (i <- owner.interfaces.asScala if !visited(i)) classNode(i) match {
+          case Left(e) => return Some(e)
+          case Right(c) =>
+            visited += i
+            // abstract and static methods are excluded, see jvms-5.4.3.3
+            for (m <- findMethod(c) if !isPrivateMethod(m) && !isStaticMethod(m)) found += ((m, c))
+            val recursionResult = findIn(c)
+            if (recursionResult.isDefined) return recursionResult
+        }
+        None
+      }
+
+      findIn(initialOwner)
+
+      val result =
+        if (found.size <= 1) found.headOption
+        else {
+          val maxSpecific = found.filterNot({
+            case (method, owner) =>
+              isAbstractMethod(method) || {
+                val ownerTp = classBTypeFromClassNode(owner)
+                found exists {
+                  case (other, otherOwner) =>
+                    (other ne method) && {
+                      val otherTp = classBTypeFromClassNode(otherOwner)
+                      otherTp.isSubtypeOf(ownerTp).get
+                    }
+                }
+              }
+          })
+          // (*) note that if there's no single, non-abstract, maximally-specific method, the jvm
+          // method resolution (jvms-5.4.3.3) returns any of the non-private, non-static parent
+          // methods at random (abstract or concrete).
+          // we chose not to do this here, to prevent the inliner from potentially inlining the
+          // wrong method. in other words, we guarantee that a concrete method is only returned if
+          // it resolves deterministically.
+          // however, there may be multiple abstract methods inherited. in this case we *do* want
+          // to return a result to allow performing accessibility checks in the inliner. note that
+          // for accessibility it does not matter which of these methods is return, as they are all
+          // non-private (i.e., public, protected is not possible, jvms-4.1).
+          // the remaining case (when there's no max-specific method, but some non-abstract one)
+          // does not occur in bytecode generated by scalac or javac. we return no result in this
+          // case. this may at worst prevent some optimizations from happening.
+          if (maxSpecific.size == 1) maxSpecific.headOption
+          else if (found.forall(p => isAbstractMethod(p._1))) found.headOption // (*)
+          else None
+        }
+      Right(result.map(p => (p._1, p._2.name)))
     }
 
     // In a MethodInsnNode, the `owner` field may be an array descriptor, for example when invoking `clone`. We don't have a method node to return in this case.
-    if (ownerInternalNameOrArrayDescriptor.charAt(0) == '[')
-      Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, Nil))
-    else
-      methodNodeImpl(ownerInternalNameOrArrayDescriptor).left.map(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, _))
+    if (ownerInternalNameOrArrayDescriptor.charAt(0) == '[') {
+      Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, None))
+    } else {
+      def notFound(cnf: Option[ClassNotFound]) = Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, cnf))
+      val res: Either[ClassNotFound, Option[(MethodNode, InternalName)]] = classNode(ownerInternalNameOrArrayDescriptor).flatMap(c =>
+        findInSuperClasses(c) flatMap {
+          case None => findInInterfaces(c)
+          case res => Right(res)
+        }
+      )
+      res match {
+        case Left(e) => notFound(Some(e))
+        case Right(None) => notFound(None)
+        case Right(Some(res)) => Right(res)
+      }
+    }
   }
 
   private def parseClass(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
@@ -157,17 +290,7 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
       classNode
     } match {
       case Some(node) => Right(node)
-      case None       => Left(ClassNotFound(internalName, isJavaSourceDefined(internalName)))
+      case None       => Left(ClassNotFound(internalName, javaDefinedClasses(internalName)))
     }
   }
 }
-
-object ByteCodeRepository {
-  /**
-   * The source of a ClassNode in the ByteCodeRepository. Can be either [[CompilationUnit]] if the
-   * class is being compiled or [[Classfile]] if the class was parsed from the compilation classpath.
-   */
-  sealed trait Source
-  object CompilationUnit extends Source
-  object Classfile extends Source
-}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
index 7aadd2c466..bfd92cac5c 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
@@ -8,28 +8,29 @@ package backend.jvm
 package opt
 
 import scala.annotation.{tailrec, switch}
+
 import scala.collection.mutable
 import scala.reflect.internal.util.Collections._
 import scala.tools.asm.commons.CodeSizeEvaluator
 import scala.tools.asm.tree.analysis._
-import scala.tools.asm.{MethodWriter, ClassWriter, Label, Opcodes, Type}
+import scala.tools.asm.{Label, Type}
+import scala.tools.asm.Opcodes._
 import scala.tools.asm.tree._
 import GenBCode._
-import scala.collection.convert.decorateAsScala._
-import scala.collection.convert.decorateAsJava._
-import scala.tools.nsc.backend.jvm.BTypes._
+import scala.collection.JavaConverters._
+import scala.tools.nsc.backend.jvm.analysis.InstructionStackEffect
 
 object BytecodeUtils {
 
   // http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.9.1
-  final val maxJVMMethodSize         = 65535
+  final val maxJVMMethodSize = 65535
 
   // 5% margin, more than enough for the instructions added by the inliner (store / load args, null check for instance methods)
   final val maxMethodSizeAfterInline = maxJVMMethodSize - (maxJVMMethodSize / 20)
 
   object Goto {
     def unapply(instruction: AbstractInsnNode): Option[JumpInsnNode] = {
-      if (instruction.getOpcode == Opcodes.GOTO) Some(instruction.asInstanceOf[JumpInsnNode])
+      if (instruction.getOpcode == GOTO) Some(instruction.asInstanceOf[JumpInsnNode])
       else None
     }
   }
@@ -49,8 +50,9 @@ object BytecodeUtils {
   }
 
   object VarInstruction {
-    def unapply(instruction: AbstractInsnNode): Option[VarInsnNode] = {
-      if (isVarInstruction(instruction)) Some(instruction.asInstanceOf[VarInsnNode])
+    def unapply(instruction: AbstractInsnNode): Option[(AbstractInsnNode, Int)] = {
+      if (isLoadStoreOrRet(instruction)) Some((instruction, instruction.asInstanceOf[VarInsnNode].`var`))
+      else if (instruction.getOpcode == IINC) Some((instruction, instruction.asInstanceOf[IincInsnNode].`var`))
       else None
     }
 
@@ -59,30 +61,46 @@ object BytecodeUtils {
   def isJumpNonJsr(instruction: AbstractInsnNode): Boolean = {
     val op = instruction.getOpcode
     // JSR is deprecated in classfile version 50, disallowed in 51. historically, it was used to implement finally.
-    op == Opcodes.GOTO || isConditionalJump(instruction)
+    op == GOTO || isConditionalJump(instruction)
   }
 
   def isConditionalJump(instruction: AbstractInsnNode): Boolean = {
     val op = instruction.getOpcode
-    (op >= Opcodes.IFEQ && op <= Opcodes.IF_ACMPNE) || op == Opcodes.IFNULL || op == Opcodes.IFNONNULL
+    (op >= IFEQ && op <= IF_ACMPNE) || op == IFNULL || op == IFNONNULL
   }
 
   def isReturn(instruction: AbstractInsnNode): Boolean = {
     val op = instruction.getOpcode
-    op >= Opcodes.IRETURN && op <= Opcodes.RETURN
+    op >= IRETURN && op <= RETURN
   }
 
   def isLoad(instruction: AbstractInsnNode): Boolean = {
     val op = instruction.getOpcode
-    op >= Opcodes.ILOAD  && op <= Opcodes.ALOAD
+    op >= ILOAD  && op <= ALOAD
   }
 
   def isStore(instruction: AbstractInsnNode): Boolean = {
     val op = instruction.getOpcode
-    op >= Opcodes.ISTORE && op <= Opcodes.ASTORE
+    op >= ISTORE && op <= ASTORE
+  }
+
+  def isLoadStoreOrRet(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction) || instruction.getOpcode == RET
+
+  def isLoadOrStore(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction)
+
+  def isNonVirtualCall(instruction: AbstractInsnNode): Boolean = {
+    val op = instruction.getOpcode
+    op == INVOKESPECIAL || op == INVOKESTATIC
   }
 
-  def isVarInstruction(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction)
+  def isVirtualCall(instruction: AbstractInsnNode): Boolean = {
+    val op = instruction.getOpcode
+    op == INVOKEVIRTUAL || op == INVOKEINTERFACE
+  }
+
+  def isCall(instruction: AbstractInsnNode): Boolean = {
+    isNonVirtualCall(instruction) || isVirtualCall(instruction)
+  }
 
   def isExecutable(instruction: AbstractInsnNode): Boolean = instruction.getOpcode >= 0
 
@@ -90,27 +108,40 @@ object BytecodeUtils {
     methodNode.name == INSTANCE_CONSTRUCTOR_NAME || methodNode.name == CLASS_CONSTRUCTOR_NAME
   }
 
-  def isStaticMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_STATIC) != 0
+  def isPublicMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_PUBLIC) != 0
 
-  def isAbstractMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_ABSTRACT) != 0
+  def isPrivateMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_PRIVATE) != 0
 
-  def isSynchronizedMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_SYNCHRONIZED) != 0
+  def isStaticMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_STATIC) != 0
 
-  def isNativeMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_NATIVE) != 0
+  def isAbstractMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_ABSTRACT) != 0
 
-  def isFinalClass(classNode: ClassNode): Boolean = (classNode.access & Opcodes.ACC_FINAL) != 0
+  def isSynchronizedMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_SYNCHRONIZED) != 0
 
-  def isFinalMethod(methodNode: MethodNode): Boolean = (methodNode.access & (Opcodes.ACC_FINAL | Opcodes.ACC_PRIVATE | Opcodes.ACC_STATIC)) != 0
+  def isNativeMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_NATIVE) != 0
 
-  def isStrictfpMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_STRICT) != 0
+  def hasCallerSensitiveAnnotation(methodNode: MethodNode): Boolean = methodNode.visibleAnnotations != null && methodNode.visibleAnnotations.asScala.exists(_.desc == "Lsun/reflect/CallerSensitive;")
+
+  def isFinalClass(classNode: ClassNode): Boolean = (classNode.access & ACC_FINAL) != 0
+
+  def isInterface(classNode: ClassNode): Boolean = (classNode.access & ACC_INTERFACE) != 0
+
+  def isFinalMethod(methodNode: MethodNode): Boolean = (methodNode.access & (ACC_FINAL | ACC_PRIVATE | ACC_STATIC)) != 0
+
+  def isStrictfpMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_STRICT) != 0
 
   def isReference(t: Type) = t.getSort == Type.OBJECT || t.getSort == Type.ARRAY
 
-  def nextExecutableInstruction(instruction: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = {
-    var result = instruction
-    do { result = result.getNext }
-    while (result != null && !isExecutable(result) && !alsoKeep(result))
-    Option(result)
+  @tailrec def nextExecutableInstruction(insn: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = {
+    val next = insn.getNext
+    if (next == null || isExecutable(next) || alsoKeep(next)) Option(next)
+    else nextExecutableInstruction(next, alsoKeep)
+  }
+
+  @tailrec def nextExecutableInstructionOrLabel(insn: AbstractInsnNode): Option[AbstractInsnNode] = {
+    val next = insn.getNext
+    if (next == null || isExecutable(next) || next.isInstanceOf[LabelNode]) Option(next)
+    else nextExecutableInstructionOrLabel(next)
   }
 
   def sameTargetExecutableInstruction(a: JumpInsnNode, b: JumpInsnNode): Boolean = {
@@ -124,14 +155,14 @@ object BytecodeUtils {
   def removeJumpAndAdjustStack(method: MethodNode, jump: JumpInsnNode) {
     val instructions = method.instructions
     val op = jump.getOpcode
-    if ((op >= Opcodes.IFEQ && op <= Opcodes.IFGE) || op == Opcodes.IFNULL || op == Opcodes.IFNONNULL) {
+    if ((op >= IFEQ && op <= IFLE) || op == IFNULL || op == IFNONNULL) {
       instructions.insert(jump, getPop(1))
-    } else if ((op >= Opcodes.IF_ICMPEQ && op <= Opcodes.IF_ICMPLE) || op == Opcodes.IF_ACMPEQ || op == Opcodes.IF_ACMPNE) {
+    } else if ((op >= IF_ICMPEQ && op <= IF_ICMPLE) || op == IF_ACMPEQ || op == IF_ACMPNE) {
       instructions.insert(jump, getPop(1))
       instructions.insert(jump, getPop(1))
     } else {
       // we can't remove JSR: its execution does not only jump, it also adds a return address to the stack
-      assert(jump.getOpcode == Opcodes.GOTO)
+      assert(jump.getOpcode == GOTO)
     }
     instructions.remove(jump)
   }
@@ -148,37 +179,61 @@ object BytecodeUtils {
   }
 
   def negateJumpOpcode(jumpOpcode: Int): Int = (jumpOpcode: @switch) match {
-    case Opcodes.IFEQ      => Opcodes.IFNE
-    case Opcodes.IFNE      => Opcodes.IFEQ
+    case IFEQ      => IFNE
+    case IFNE      => IFEQ
+
+    case IFLT      => IFGE
+    case IFGE      => IFLT
 
-    case Opcodes.IFLT      => Opcodes.IFGE
-    case Opcodes.IFGE      => Opcodes.IFLT
+    case IFGT      => IFLE
+    case IFLE      => IFGT
 
-    case Opcodes.IFGT      => Opcodes.IFLE
-    case Opcodes.IFLE      => Opcodes.IFGT
+    case IF_ICMPEQ => IF_ICMPNE
+    case IF_ICMPNE => IF_ICMPEQ
 
-    case Opcodes.IF_ICMPEQ => Opcodes.IF_ICMPNE
-    case Opcodes.IF_ICMPNE => Opcodes.IF_ICMPEQ
+    case IF_ICMPLT => IF_ICMPGE
+    case IF_ICMPGE => IF_ICMPLT
 
-    case Opcodes.IF_ICMPLT => Opcodes.IF_ICMPGE
-    case Opcodes.IF_ICMPGE => Opcodes.IF_ICMPLT
+    case IF_ICMPGT => IF_ICMPLE
+    case IF_ICMPLE => IF_ICMPGT
 
-    case Opcodes.IF_ICMPGT => Opcodes.IF_ICMPLE
-    case Opcodes.IF_ICMPLE => Opcodes.IF_ICMPGT
+    case IF_ACMPEQ => IF_ACMPNE
+    case IF_ACMPNE => IF_ACMPEQ
 
-    case Opcodes.IF_ACMPEQ => Opcodes.IF_ACMPNE
-    case Opcodes.IF_ACMPNE => Opcodes.IF_ACMPEQ
+    case IFNULL    => IFNONNULL
+    case IFNONNULL => IFNULL
+  }
 
-    case Opcodes.IFNULL    => Opcodes.IFNONNULL
-    case Opcodes.IFNONNULL => Opcodes.IFNULL
+  def isSize2LoadOrStore(opcode: Int): Boolean = (opcode: @switch) match {
+    case LLOAD | DLOAD | LSTORE | DSTORE => true
+    case _ => false
   }
 
   def getPop(size: Int): InsnNode = {
-    val op = if (size == 1) Opcodes.POP else Opcodes.POP2
+    val op = if (size == 1) POP else POP2
     new InsnNode(op)
   }
 
-  def instructionResultSize(instruction: AbstractInsnNode) = InstructionResultSize(instruction)
+  def instructionResultSize(insn: AbstractInsnNode) = InstructionStackEffect.prod(InstructionStackEffect.forClassfile(insn))
+
+  def loadZeroForTypeSort(sort: Int) = (sort: @switch) match {
+    case Type.BOOLEAN |
+         Type.BYTE |
+         Type.CHAR |
+         Type.SHORT |
+         Type.INT => new InsnNode(ICONST_0)
+    case Type.LONG => new InsnNode(LCONST_0)
+    case Type.FLOAT => new InsnNode(FCONST_0)
+    case Type.DOUBLE => new InsnNode(DCONST_0)
+    case Type.OBJECT => new InsnNode(ACONST_NULL)
+  }
+
+  /**
+   * The number of local variable slots used for parameters and for the `this` reference.
+   */
+  def parametersSize(methodNode: MethodNode): Int = {
+    (Type.getArgumentsAndReturnSizes(methodNode.desc) >> 2) - (if (isStaticMethod(methodNode)) 1 else 0)
+  }
 
   def labelReferences(method: MethodNode): Map[LabelNode, Set[AnyRef]] = {
     val res = mutable.Map.empty[LabelNode, Set[AnyRef]]
@@ -222,29 +277,6 @@ object BytecodeUtils {
     }
   }
 
-  /**
-   * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM
-   * framework only computes these values during bytecode generation.
-   *
-   * Since there's currently no better way, we run a bytecode generator on the method and extract
-   * the computed values. This required changes to the ASM codebase:
-   *   - the [[MethodWriter]] class was made public
-   *   - accessors for maxLocals / maxStack were added to the MethodWriter class
-   *
-   * We could probably make this faster (and allocate less memory) by hacking the ASM framework
-   * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be
-   * to create a separate visitor for computing those values, duplicating the functionality from the
-   * MethodWriter.
-   */
-  def computeMaxLocalsMaxStack(method: MethodNode): Unit = {
-    val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS)
-    val excs = method.exceptions.asScala.toArray
-    val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter]
-    method.accept(mw)
-    method.maxLocals = mw.getMaxLocals
-    method.maxStack = mw.getMaxStack
-  }
-
   def codeSizeOKForInlining(caller: MethodNode, callee: MethodNode): Boolean = {
     // Looking at the implementation of CodeSizeEvaluator, all instructions except tableswitch and
     // lookupswitch are <= 8 bytes. These should be rare enough for 8 to be an OK rough upper bound.
@@ -289,34 +321,36 @@ object BytecodeUtils {
   }
 
   /**
-   * Clone the instructions in `methodNode` into a new [[InsnList]], mapping labels according to
-   * the `labelMap`. Returns the new instruction list and a map from old to new instructions.
-   */
-  def cloneInstructions(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode]): (InsnList, Map[AbstractInsnNode, AbstractInsnNode]) = {
-    val javaLabelMap = labelMap.asJava
-    val result = new InsnList
-    var map = Map.empty[AbstractInsnNode, AbstractInsnNode]
-    for (ins <- methodNode.instructions.iterator.asScala) {
-      val cloned = ins.clone(javaLabelMap)
-      result add cloned
-      map += ((ins, cloned))
-    }
-    (result, map)
-  }
-
-  /**
    * Clone the local variable descriptors of `methodNode` and map their `start` and `end` labels
    * according to the `labelMap`.
    */
-  def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String): List[LocalVariableNode] = {
-    methodNode.localVariables.iterator().asScala.map(localVariable => new LocalVariableNode(
-      prefix + localVariable.name,
-      localVariable.desc,
-      localVariable.signature,
-      labelMap(localVariable.start),
-      labelMap(localVariable.end),
-      localVariable.index
-    )).toList
+  def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], calleeMethodName: String, shift: Int): List[LocalVariableNode] = {
+    methodNode.localVariables.iterator().asScala.map(localVariable => {
+      val name =
+        if (calleeMethodName.length + localVariable.name.length < BTypes.InlinedLocalVariablePrefixMaxLenght) {
+          calleeMethodName + "_" + localVariable.name
+        } else {
+          val parts = localVariable.name.split("_").toVector
+          val (methNames, varName) = (calleeMethodName +: parts.init, parts.last)
+          // keep at least 5 characters per method name
+          val maxNumMethNames = BTypes.InlinedLocalVariablePrefixMaxLenght / 5
+          val usedMethNames =
+            if (methNames.length < maxNumMethNames) methNames
+            else {
+              val half = maxNumMethNames / 2
+              methNames.take(half) ++ methNames.takeRight(half)
+            }
+          val charsPerMethod = BTypes.InlinedLocalVariablePrefixMaxLenght / usedMethNames.length
+          usedMethNames.foldLeft("")((res, methName) => res + methName.take(charsPerMethod) + "_") + varName
+        }
+      new LocalVariableNode(
+        name,
+        localVariable.desc,
+        localVariable.signature,
+        labelMap(localVariable.start),
+        labelMap(localVariable.end),
+        localVariable.index + shift)
+    }).toList
   }
 
   /**
@@ -344,23 +378,14 @@ object BytecodeUtils {
    * method which explains the issue with such phantom values.
    */
   def fixLoadedNothingOrNullValue(loadedType: Type, loadInstr: AbstractInsnNode, methodNode: MethodNode, bTypes: BTypes): Unit = {
-    if (loadedType == bTypes.coreBTypes.RT_NOTHING.toASMType) {
-      methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.ATHROW))
-    } else if (loadedType == bTypes.coreBTypes.RT_NULL.toASMType) {
-      methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.ACONST_NULL))
-      methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.POP))
+    if (loadedType == bTypes.coreBTypes.srNothingRef.toASMType) {
+      methodNode.instructions.insert(loadInstr, new InsnNode(ATHROW))
+    } else if (loadedType == bTypes.coreBTypes.srNullRef.toASMType) {
+      methodNode.instructions.insert(loadInstr, new InsnNode(ACONST_NULL))
+      methodNode.instructions.insert(loadInstr, new InsnNode(POP))
     }
   }
 
-  /**
-   * A wrapper to make ASM's Analyzer a bit easier to use.
-   */
-  class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, interpreter: Interpreter[V] = new BasicInterpreter) {
-    val analyzer = new Analyzer(interpreter)
-    analyzer.analyze(classInternalName, methodNode)
-    def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode)
-  }
-
   implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal {
     def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction))
   }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
index 96455c0e38..a740ca525c 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
@@ -7,182 +7,320 @@ package scala.tools.nsc
 package backend.jvm
 package opt
 
+import scala.collection.immutable.IntMap
 import scala.reflect.internal.util.{NoPosition, Position}
-import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter}
-import scala.tools.asm.{Opcodes, Type, Handle}
+import scala.tools.asm.{Handle, Opcodes, Type}
 import scala.tools.asm.tree._
-import scala.collection.concurrent
-import scala.collection.convert.decorateAsScala._
-import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.collection.{concurrent, mutable}
+import scala.collection.JavaConverters._
+import scala.tools.nsc.backend.jvm.BTypes.{InternalName, MethodInlineInfo}
 import scala.tools.nsc.backend.jvm.BackendReporting._
-import scala.tools.nsc.backend.jvm.analysis.{NotNull, NullnessAnalyzer}
-import ByteCodeRepository.{Source, CompilationUnit}
+import scala.tools.nsc.backend.jvm.analysis._
 import BytecodeUtils._
 
 class CallGraph[BT <: BTypes](val btypes: BT) {
   import btypes._
+  import backendUtils._
 
-  val callsites: concurrent.Map[MethodInsnNode, Callsite] = recordPerRunCache(concurrent.TrieMap.empty)
+  /**
+   * The call graph contains the callsites in the program being compiled.
+   *
+   * Indexing the call graph by the containing MethodNode and the invocation MethodInsnNode allows
+   * finding callsites efficiently. For example, an inlining heuristic might want to know all
+   * callsites within a callee method.
+   *
+   * Note that the call graph is not guaranteed to be complete: callsites may be missing. In
+   * particular, if a method is very large, all of its callsites might not be in the hash map.
+   * The reason is that adding a method to the call graph requires running an ASM analyzer, which
+   * can be too slow.
+   *
+   * Note that call graph entries (Callsite instances) keep a reference to the invocation
+   * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes
+   * from the classpath to the call graph (in addition to classes being compiled) may prevent
+   * method instruction nodes from being GCd. The ByteCodeRepository has a fixed size cache for
+   * parsed ClassNodes - keeping all ClassNodes alive consumed too much memory.
+   * The call graph is less problematic because only methods being called are kept alive, not entire
+   * classes. But we should keep an eye on this.
+   */
+  val callsites: mutable.Map[MethodNode, Map[MethodInsnNode, Callsite]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty)
+
+  /**
+   * Closure instantiations in the program being compiled.
+   *
+   * Indexing closure instantiations by the containing MethodNode is beneficial for the closure
+   * optimizer: finding callsites to re-write requires running a producers-consumers analysis on
+   * the method. Here the closure instantiations are already grouped by method.
+   */
+  val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty)
+
+  def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = {
+    val methodCallsites = callsites(methodNode)
+    val newCallsites = methodCallsites - invocation
+    if (newCallsites.isEmpty) callsites.remove(methodNode)
+    else callsites(methodNode) = newCallsites
+    methodCallsites.get(invocation)
+  }
+
+  def addCallsite(callsite: Callsite): Unit = {
+    val methodCallsites = callsites(callsite.callsiteMethod)
+    callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite)
+  }
 
-  val closureInstantiations: concurrent.Map[InvokeDynamicInsnNode, ClosureInstantiation] = recordPerRunCache(concurrent.TrieMap.empty)
+  def containsCallsite(callsite: Callsite): Boolean = callsites(callsite.callsiteMethod) contains callsite.callsiteInstruction
+  def findCallSite(method: MethodNode, call: MethodInsnNode): Option[Callsite] = callsites.getOrElse(method, Map.empty).get(call)
+
+  def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = {
+    val methodClosureInits = closureInstantiations(methodNode)
+    val newClosureInits = methodClosureInits - indy
+    if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode)
+    else closureInstantiations(methodNode) = newClosureInits
+    methodClosureInits.get(indy)
+  }
+
+  def addClosureInstantiation(closureInit: ClosureInstantiation) = {
+    val methodClosureInits = closureInstantiations(closureInit.ownerMethod)
+    closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit)
+  }
 
   def addClass(classNode: ClassNode): Unit = {
     val classType = classBTypeFromClassNode(classNode)
-    for {
-      m <- classNode.methods.asScala
-      (calls, closureInits) = analyzeCallsites(m, classType)
-    } {
-      calls foreach (callsite => callsites(callsite.callsiteInstruction) = callsite)
-      closureInits foreach (lmf => closureInstantiations(lmf.indy) = ClosureInstantiation(lmf, m, classType))
-    }
+    classNode.methods.asScala.foreach(addMethod(_, classType))
   }
 
-  /**
-   * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions.
-   */
-  def analyzeCallsites(methodNode: MethodNode, definingClass: ClassBType): (List[Callsite], List[LambdaMetaFactoryCall]) = {
+  def addIfMissing(methodNode: MethodNode, definingClass: ClassBType): Unit = {
+    if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass)
+  }
 
-    case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean,
-                            annotatedInline: Boolean, annotatedNoInline: Boolean,
-                            warning: Option[CalleeInfoWarning])
+  def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = {
+    if (!BytecodeUtils.isAbstractMethod(methodNode) && !BytecodeUtils.isNativeMethod(methodNode)) {
+      // TODO: run dataflow analyses to make the call graph more precise
+      //  - producers to get forwarded parameters (ForwardedParam)
+      //  - typeAnalysis for more precise argument types, more precise callee
+
+      // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance
+      // call is known to be not-null, in which case we don't have to emit a null check when inlining.
+      // It is also used to get the stack height at the call site.
+
+      val analyzer = {
+        if (compilerSettings.optNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) {
+          Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer(btypes, methodNode)))
+        } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) {
+          Some(new AsmAnalyzer(methodNode, definingClass.internalName))
+        } else None
+      }
 
-    /**
-     * Analyze a callsite and gather meta-data that can be used for inlining decisions.
-     */
-    def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = {
-      val methodSignature = calleeMethodNode.name + calleeMethodNode.desc
+      // if the method is too large to run an analyzer, it is not added to the call graph
+      if (analyzer.nonEmpty) {
+        val Some(a) = analyzer
+        def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match {
+          case nullnessAnalyzer: NullnessAnalyzer =>
+            val frame = nullnessAnalyzer.frameAt(call, methodNode)
+            frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue
+          case _ => false
+        }
 
-      try {
-        // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared*
-        // within a class (not for inherited methods). Since we already have the  classBType of the
-        // callee, we only check there for the methodInlineInfo, we should find it there.
-        calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match {
-          case Some(methodInlineInfo) =>
-            val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit
+        var methodCallsites = Map.empty[MethodInsnNode, Callsite]
+        var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation]
+
+        // lazy so it is only computed if actually used by computeArgInfos
+        lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName)
+
+        methodNode.instructions.iterator.asScala foreach {
+          case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code
+            val callee: Either[OptimizerWarning, Callee] = for {
+              (method, declarationClass)                   <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)]
+              (declarationClassNode, calleeSourceFilePath) <- byteCodeRepository.classNodeAndSourceFilePath(declarationClass): Either[OptimizerWarning, (ClassNode, Option[String])]
+            } yield {
+              val declarationClassBType = classBTypeFromClassNode(declarationClassNode)
+              val info = analyzeCallsite(method, declarationClassBType, call, calleeSourceFilePath)
+              import info._
+              Callee(
+                callee = method,
+                calleeDeclarationClass = declarationClassBType,
+                isStaticallyResolved = isStaticallyResolved,
+                sourceFilePath = sourceFilePath,
+                annotatedInline = annotatedInline,
+                annotatedNoInline = annotatedNoInline,
+                samParamTypes = info.samParamTypes,
+                calleeInfoWarning = warning)
+            }
 
-            val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode)
+            val argInfos = computeArgInfos(callee, call, prodCons)
 
-            // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example:
-            //   class A { @inline def f = 1 }; object B extends A; B.f  // can be inlined
-            //
-            // TODO: type analysis can render more calls statically resolved. Example:
-            //   new A.f  // can be inlined, the receiver type is known to be exactly A.
-            val isStaticallyResolved: Boolean = {
-              methodInlineInfo.effectivelyFinal ||
-                classBTypeFromParsedClassfile(receiverTypeInternalName).info.orThrow.inlineInfo.isEffectivelyFinal // (1)
+            val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || {
+              val numArgs = Type.getArgumentTypes(call.desc).length
+              receiverNotNullByAnalysis(call, numArgs)
             }
 
-            val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation
-
-            val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map(
-              MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _))
-
-            // (1) For invocations of final trait methods, the callee isStaticallyResolved but also
-            //     abstract. Such a callee is not safe to inline - it needs to be re-written to the
-            //     static impl method first (safeToRewrite).
-            // (2) Final trait methods can be rewritten from the interface to the static implementation
-            //     method to enable inlining.
-            CallsiteInfo(
-              safeToInline      =
-                canInlineFromSource &&
-                isStaticallyResolved &&  // (1)
-                !isAbstract &&
-                !BytecodeUtils.isConstructor(calleeMethodNode) &&
-                !BytecodeUtils.isNativeMethod(calleeMethodNode),
-              safeToRewrite     = canInlineFromSource && isRewritableTraitCall, // (2)
-              annotatedInline   = methodInlineInfo.annotatedInline,
-              annotatedNoInline = methodInlineInfo.annotatedNoInline,
-              warning           = warning)
-
-          case None =>
-            val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning)
-            CallsiteInfo(false, false, false, false, Some(warning))
+            methodCallsites += call -> Callsite(
+              callsiteInstruction = call,
+              callsiteMethod = methodNode,
+              callsiteClass = definingClass,
+              callee = callee,
+              argInfos = argInfos,
+              callsiteStackHeight = a.frameAt(call).getStackSize,
+              receiverKnownNotNull = receiverNotNull,
+              callsitePosition = callsitePositions.getOrElse(call, NoPosition),
+              annotatedInline = inlineAnnotatedCallsites(call),
+              annotatedNoInline = noInlineAnnotatedCallsites(call)
+            )
+
+          case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null =>
+            val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType)
+            val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons)
+            methodClosureInstantiations += indy -> ClosureInstantiation(
+              lmf,
+              methodNode,
+              definingClass,
+              capturedArgInfos)
+
+          case _ =>
         }
-      } catch {
-        case Invalid(noInfo: NoClassBTypeInfo) =>
-          val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo)
-          CallsiteInfo(false, false, false, false, Some(warning))
+
+        callsites(methodNode) = methodCallsites
+        closureInstantiations(methodNode) = methodClosureInstantiations
       }
     }
+  }
 
-    // TODO: run dataflow analyses to make the call graph more precise
-    //  - producers to get forwarded parameters (ForwardedParam)
-    //  - typeAnalysis for more precise argument types, more precise callee
-
-    // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance
-    // call is known to be not-null, in which case we don't have to emit a null check when inlining.
-    // It is also used to get the stack height at the call site.
-    localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName)
-
-    val analyzer: Analyzer[_ <: Value] = {
-      if (compilerSettings.YoptNullnessTracking) new NullnessAnalyzer
-      else new Analyzer(new BasicInterpreter)
+  def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    if (callee.isLeft) IntMap.empty
+    else {
+      lazy val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1)
+      argInfosForSams(callee.get.samParamTypes, callsiteInsn, numArgs, prodCons)
     }
-    analyzer.analyze(definingClass.internalName, methodNode)
+  }
 
-    def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match {
-      case nullnessAnalyzer: NullnessAnalyzer =>
-        val frame = nullnessAnalyzer.frameAt(call, methodNode)
-        frame.getStack(frame.getStackSize - 1 - numArgs).nullness == NotNull
+  def computeCapturedArgInfos(lmf: LambdaMetaFactoryCall, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    val capturedSams = capturedSamTypes(lmf)
+    val numCaptures = Type.getArgumentTypes(lmf.indy.desc).length
+    argInfosForSams(capturedSams, lmf.indy, numCaptures, prodCons)
+  }
 
-      case _ => false
+  private def argInfosForSams(sams: IntMap[ClassBType], consumerInsn: AbstractInsnNode, numConsumed: => Int, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    // TODO: use type analysis instead of ProdCons - should be more efficient
+    // some random thoughts:
+    //  - assign special types to parameters and indy-lambda-functions to track them
+    //  - upcast should not change type flow analysis: don't lose information.
+    //  - can we do something about factory calls? Foo(x) for case class foo gives a Foo.
+    //    inline the factory? analysis across method boundary?
+
+    // assign to a lazy val to prevent repeated evaluation of the by-name arg
+    lazy val prodConsI = prodCons
+    lazy val firstConsumedSlot = {
+      val consumerFrame = prodConsI.frameAt(consumerInsn)
+      consumerFrame.stackTop - numConsumed + 1
     }
-
-    val callsites = new collection.mutable.ListBuffer[Callsite]
-    val closureInstantiations = new collection.mutable.ListBuffer[LambdaMetaFactoryCall]
-
-    methodNode.instructions.iterator.asScala foreach {
-      case call: MethodInsnNode =>
-        val callee: Either[OptimizerWarning, Callee] = for {
-          (method, declarationClass)     <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)]
-          (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)]
-          declarationClassBType          =  classBTypeFromClassNode(declarationClassNode)
-        } yield {
-          val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source)
-          Callee(
-            callee = method,
-            calleeDeclarationClass = declarationClassBType,
-            safeToInline = safeToInline,
-            safeToRewrite = safeToRewrite,
-            annotatedInline = annotatedInline,
-            annotatedNoInline = annotatedNoInline,
-            calleeInfoWarning = warning)
+    sams flatMap {
+      case (index, _) =>
+        val prods = prodConsI.initialProducersForValueAt(consumerInsn, firstConsumedSlot + index)
+        if (prods.size != 1) None
+        else {
+          val argInfo = prods.head match {
+            case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral)
+            case ParameterProducer(local)          => Some(ForwardedParam(local))
+            case _                                 => None
+          }
+          argInfo.map((index, _))
         }
+    }
+  }
 
-        val argInfos = if (callee.isLeft) Nil else {
-          // TODO: for now it's Nil, because we don't run any data flow analysis
-          // there's no point in using the parameter types, that doesn't add any information.
-          // NOTE: need to run the same analyses after inlining, to re-compute the argInfos for the
-          // new duplicated callsites, see Inliner.inline
-          Nil
-        }
+  def samParamTypes(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = {
+    val paramTypes = {
+      val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor))
+      val isStatic = BytecodeUtils.isStaticMethod(methodNode)
+      if (isStatic) params else receiverType +: params
+    }
+    samTypes(paramTypes)
+  }
 
-        val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || {
-          val numArgs = Type.getArgumentTypes(call.desc).length
-          receiverNotNullByAnalysis(call, numArgs)
-        }
+  def capturedSamTypes(lmf: LambdaMetaFactoryCall): IntMap[ClassBType] = {
+    val capturedTypes = Type.getArgumentTypes(lmf.indy.desc).map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor))
+    samTypes(capturedTypes)
+  }
 
-        callsites += Callsite(
-          callsiteInstruction = call,
-          callsiteMethod = methodNode,
-          callsiteClass = definingClass,
-          callee = callee,
-          argInfos = argInfos,
-          callsiteStackHeight = analyzer.frameAt(call, methodNode).getStackSize,
-          receiverKnownNotNull = receiverNotNull,
-          callsitePosition = callsitePositions.getOrElse(call, NoPosition)
-        )
-
-      case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) =>
-        closureInstantiations += LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType)
-
-      case _ =>
-    }
+  private def samTypes(types: Array[BType]): IntMap[ClassBType] = {
+    var res = IntMap.empty[ClassBType]
+    for (i <- types.indices) {
+      types(i) match {
+        case c: ClassBType =>
+          if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c)
 
-    (callsites.toList, closureInstantiations.toList)
+        case _ =>
+      }
+    }
+    res
   }
 
   /**
+   * Just a named tuple used as return type of `analyzeCallsite`.
+   */
+  private case class CallsiteInfo(isStaticallyResolved: Boolean, sourceFilePath: Option[String],
+                                  annotatedInline: Boolean, annotatedNoInline: Boolean,
+                                  samParamTypes: IntMap[ClassBType],
+                                  warning: Option[CalleeInfoWarning])
+
+  /**
+   * Analyze a callsite and gather meta-data that can be used for inlining decisions.
+   */
+  private def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, call: MethodInsnNode, calleeSourceFilePath: Option[String]): CallsiteInfo = {
+    val methodSignature = calleeMethodNode.name + calleeMethodNode.desc
+
+    try {
+      // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared*
+      // within a class (not for inherited methods). Since we already have the  classBType of the
+      // callee, we only check there for the methodInlineInfo, we should find it there.
+      calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match {
+        case Some(methodInlineInfo) =>
+          val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode)
+
+          val receiverType = classBTypeFromParsedClassfile(call.owner)
+          // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example:
+          //   class A { @inline def f = 1 }; object B extends A; B.f  // can be inlined
+          //
+          // TODO: (1) doesn't cover the following example:
+          //   trait TravLike { def map = ... }
+          //   sealed trait List extends TravLike { ... } // assume map is not overridden
+          //   final case class :: / final case object Nil
+          //   (l: List).map // can be inlined
+          // we need to know that
+          //   - the receiver is sealed
+          //   - what are the children of the receiver
+          //   - all children are final
+          //   - none of the children overrides map
+          //
+          // TODO: type analysis can render more calls statically resolved. Example:
+          //   new A.f  // can be inlined, the receiver type is known to be exactly A.
+          val isStaticallyResolved: Boolean = {
+            isNonVirtualCall(call) || // SD-86: super calls (invokespecial) can be inlined -- TODO: check if that's still needed, and if it's correct: scala-dev#143
+            methodInlineInfo.effectivelyFinal ||
+              receiverType.info.orThrow.inlineInfo.isEffectivelyFinal // (1)
+          }
+
+          val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map(
+            MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _))
+
+          CallsiteInfo(
+            isStaticallyResolved = isStaticallyResolved,
+            sourceFilePath       = calleeSourceFilePath,
+            annotatedInline      = methodInlineInfo.annotatedInline,
+            annotatedNoInline    = methodInlineInfo.annotatedNoInline,
+            samParamTypes        = samParamTypes(calleeMethodNode, receiverType),
+            warning              = warning)
+
+        case None =>
+          val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning)
+          CallsiteInfo(false, None, false, false, IntMap.empty, Some(warning))
+      }
+    } catch {
+      case Invalid(noInfo: NoClassBTypeInfo) =>
+        val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo)
+        CallsiteInfo(false, None, false, false, IntMap.empty, Some(warning))
+    }
+  }
+
+    /**
    * A callsite in the call graph.
    *
    * @param callsiteInstruction The invocation instruction
@@ -197,21 +335,35 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
    * @param callsitePosition    The source position of the callsite, used for inliner warnings.
    */
   final case class Callsite(callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-                            callee: Either[OptimizerWarning, Callee], argInfos: List[ArgInfo],
-                            callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position) {
+                            callee: Either[OptimizerWarning, Callee], argInfos: IntMap[ArgInfo],
+                            callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position,
+                            annotatedInline: Boolean, annotatedNoInline: Boolean) {
+    /**
+     * Contains callsites that were created during inlining by cloning this callsite. Used to find
+     * corresponding callsites when inlining post-inline requests.
+     */
+    val inlinedClones = mutable.Set.empty[ClonedCallsite]
+
+    // an annotation at the callsite takes precedence over an annotation at the definition site
+    def isInlineAnnotated = annotatedInline || (callee.get.annotatedInline && !annotatedNoInline)
+    def isNoInlineAnnotated = annotatedNoInline || (callee.get.annotatedNoInline && !annotatedInline)
+
     override def toString =
       "Invocation of" +
         s" ${callee.map(_.calleeDeclarationClass.internalName).getOrElse("?")}.${callsiteInstruction.name + callsiteInstruction.desc}" +
         s"@${callsiteMethod.instructions.indexOf(callsiteInstruction)}" +
-        s" in ${callsiteClass.internalName}.${callsiteMethod.name}"
+        s" in ${callsiteClass.internalName}.${callsiteMethod.name}${callsiteMethod.desc}"
   }
 
+  final case class ClonedCallsite(callsite: Callsite, clonedWhenInlining: Callsite)
+
   /**
    * Information about invocation arguments, obtained through data flow analysis of the callsite method.
    */
   sealed trait ArgInfo
-  final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo
+  case object FunctionLiteral extends ArgInfo
   final case class ForwardedParam(index: Int) extends ArgInfo
+  //  final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo
   // can be extended, e.g., with constant types
 
   /**
@@ -221,46 +373,50 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
    *                               virtual calls, an override of the callee might be invoked. Also,
    *                               the callee can be abstract.
    * @param calleeDeclarationClass The class in which the callee is declared
-   * @param safeToInline           True if the callee can be safely inlined: it cannot be overridden,
-   *                               and the inliner settings (project / global) allow inlining it.
-   * @param safeToRewrite          True if the callee is the interface method of a concrete trait method
-   *                               that can be safely re-written to the static implementation method.
+   * @param isStaticallyResolved   True if the callee cannot be overridden
    * @param annotatedInline        True if the callee is annotated @inline
    * @param annotatedNoInline      True if the callee is annotated @noinline
+   * @param samParamTypes          A map from parameter positions to SAM parameter types
    * @param calleeInfoWarning      An inliner warning if some information was not available while
    *                               gathering the information about this callee.
    */
-  final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType,
-                          safeToInline: Boolean, safeToRewrite: Boolean,
+  final case class Callee(callee: MethodNode, calleeDeclarationClass: btypes.ClassBType,
+                          isStaticallyResolved: Boolean, sourceFilePath: Option[String],
                           annotatedInline: Boolean, annotatedNoInline: Boolean,
+                          samParamTypes: IntMap[btypes.ClassBType],
                           calleeInfoWarning: Option[CalleeInfoWarning]) {
-    assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.")
+    override def toString = s"Callee($calleeDeclarationClass.${callee.name})"
+
+    def canInlineFromSource = inlinerHeuristics.canInlineFromSource(sourceFilePath)
+    def isAbstract = isAbstractMethod(callee)
+    def isSpecialMethod = isConstructor(callee) || isNativeMethod(callee) || hasCallerSensitiveAnnotation(callee)
+
+    def safeToInline = isStaticallyResolved && canInlineFromSource && !isAbstract && !isSpecialMethod
   }
 
-  final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) {
+  /**
+   * Metadata about a closure instantiation, stored in the call graph
+   *
+   * @param lambdaMetaFactoryCall the InvokeDynamic instruction
+   * @param ownerMethod           the method where the closure is allocated
+   * @param ownerClass            the class containing the above method
+   * @param capturedArgInfos      information about captured arguments. Used for updating the call
+   *                              graph when re-writing a closure invocation to the body method.
+   */
+  final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType, capturedArgInfos: IntMap[ArgInfo]) {
+    /**
+     * Contains closure instantiations that were created during inlining by cloning this instantiation.
+     */
+    val inlinedClones = mutable.Set.empty[ClosureInstantiation]
     override def toString = s"ClosureInstantiation($lambdaMetaFactoryCall, ${ownerMethod.name + ownerMethod.desc}, $ownerClass)"
   }
   final case class LambdaMetaFactoryCall(indy: InvokeDynamicInsnNode, samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type)
 
   object LambdaMetaFactoryCall {
-    private val lambdaMetaFactoryInternalName: InternalName = "java/lang/invoke/LambdaMetafactory"
-
-    private val metafactoryHandle = {
-      val metafactoryMethodName: String = "metafactory"
-      val metafactoryDesc: String       = "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/invoke/MethodType;Ljava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/CallSite;"
-      new Handle(Opcodes.H_INVOKESTATIC, lambdaMetaFactoryInternalName, metafactoryMethodName, metafactoryDesc)
-    }
-
-    private val altMetafactoryHandle = {
-      val altMetafactoryMethodName: String = "altMetafactory"
-      val altMetafactoryDesc: String       = "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;"
-      new Handle(Opcodes.H_INVOKESTATIC, lambdaMetaFactoryInternalName, altMetafactoryMethodName, altMetafactoryDesc)
-    }
-
     def unapply(insn: AbstractInsnNode): Option[(InvokeDynamicInsnNode, Type, Handle, Type)] = insn match {
-      case indy: InvokeDynamicInsnNode if indy.bsm == metafactoryHandle || indy.bsm == altMetafactoryHandle =>
+      case indy: InvokeDynamicInsnNode if indy.bsm == coreBTypes.lambdaMetaFactoryMetafactoryHandle || indy.bsm == coreBTypes.lambdaMetaFactoryAltMetafactoryHandle =>
         indy.bsmArgs match {
-          case Array(samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type, xs@_*) => // xs binding because IntelliJ gets confused about _@_*
+          case Array(samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type, _@_*) =>
             // LambdaMetaFactory performs a number of automatic adaptations when invoking the lambda
             // implementation method (casting, boxing, unboxing, and primitive widening, see Javadoc).
             //
@@ -284,7 +440,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
             // When re-writing the closure callsite to the implMethod, we have to insert a cast.
             //
             // The check below ensures that
-            //   (1) the implMethod type has the expected singature (captured types plus argument types
+            //   (1) the implMethod type has the expected signature (captured types plus argument types
             //       from instantiatedMethodType)
             //   (2) the receiver of the implMethod matches the first captured type
             //   (3) all parameters that are not the same in samMethodType and instantiatedMethodType
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
index b0dc6ead1b..2fca8991ab 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
@@ -8,21 +8,39 @@ package backend.jvm
 package opt
 
 import scala.annotation.switch
-import scala.collection.immutable
+import scala.collection.mutable
+import scala.collection.immutable.IntMap
 import scala.reflect.internal.util.NoPosition
 import scala.tools.asm.{Type, Opcodes}
 import scala.tools.asm.tree._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
-import scala.tools.nsc.backend.jvm.analysis.ProdConsAnalyzer
 import BytecodeUtils._
 import BackendReporting._
 import Opcodes._
-import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository.CompilationUnit
-import scala.collection.convert.decorateAsScala._
+import scala.collection.JavaConverters._
 
 class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
   import btypes._
   import callGraph._
+  import coreBTypes._
+  import backendUtils._
+  import ClosureOptimizer._
+
+  private object closureInitOrdering extends Ordering[ClosureInstantiation] {
+    override def compare(x: ClosureInstantiation, y: ClosureInstantiation): Int = {
+      val cls = x.ownerClass.internalName compareTo y.ownerClass.internalName
+      if (cls != 0) return cls
+
+      val mName = x.ownerMethod.name compareTo y.ownerMethod.name
+      if (mName != 0) return mName
+
+      val mDesc = x.ownerMethod.desc compareTo y.ownerMethod.desc
+      if (mDesc != 0) return mDesc
+
+      def pos(inst: ClosureInstantiation) = inst.ownerMethod.instructions.indexOf(inst.lambdaMetaFactoryCall.indy)
+      pos(x) - pos(y)
+    }
+  }
 
   /**
    * If a closure is allocated and invoked within the same method, re-write the invocation to the
@@ -54,55 +72,51 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
    *   [invoke the closure body method]
    */
   def rewriteClosureApplyInvocations(): Unit = {
-    implicit object closureInitOrdering extends Ordering[ClosureInstantiation] {
-      override def compare(x: ClosureInstantiation, y: ClosureInstantiation): Int = {
-        val cls = x.ownerClass.internalName compareTo y.ownerClass.internalName
-        if (cls != 0) return cls
-
-        val mName = x.ownerMethod.name compareTo y.ownerMethod.name
-        if (mName != 0) return mName
 
-        val mDesc = x.ownerMethod.desc compareTo y.ownerMethod.desc
-        if (mDesc != 0) return mDesc
-
-        def pos(inst: ClosureInstantiation) = inst.ownerMethod.instructions.indexOf(inst.lambdaMetaFactoryCall.indy)
-        pos(x) - pos(y)
-      }
+    // sort all closure invocations to rewrite to ensure bytecode stability
+    val toRewrite = mutable.TreeMap.empty[ClosureInstantiation, mutable.ArrayBuffer[(MethodInsnNode, Int)]](closureInitOrdering)
+    def addRewrite(init: ClosureInstantiation, invocation: MethodInsnNode, stackHeight: Int): Unit = {
+      val callsites = toRewrite.getOrElseUpdate(init, mutable.ArrayBuffer.empty[(MethodInsnNode, Int)])
+      callsites += ((invocation, stackHeight))
     }
 
-    // Grouping the closure instantiations by method allows running the ProdConsAnalyzer only once per
-    // method. Also sort the instantiations: If there are multiple closure instantiations in a method,
-    // closure invocations need to be re-written in a consistent order for bytecode stability. The local
-    // variable slots for storing captured values depends on the order of rewriting.
-    val closureInstantiationsByMethod: Map[MethodNode, immutable.TreeSet[ClosureInstantiation]] = {
-      closureInstantiations.values.groupBy(_.ownerMethod).mapValues(immutable.TreeSet.empty ++ _)
-    }
+    // For each closure instantiation find callsites of the closure and add them to the toRewrite
+    // buffer (cannot change a method's bytecode while still looking for further invocations to
+    // rewrite, the frame indices of the ProdCons analysis would get out of date). If a callsite
+    // cannot be rewritten, for example because the lambda body method is not accessible, issue a
+    // warning. The `toList` in the next line prevents modifying closureInstantiations while
+    // iterating it: minimalRemoveUnreachableCode (called in the loop) removes elements.
+    for (method <- closureInstantiations.keysIterator.toList if AsmAnalyzer.sizeOKForBasicValue(method)) closureInstantiations.get(method) match {
+      case Some(closureInitsBeforeDCE) if closureInitsBeforeDCE.nonEmpty =>
+        val ownerClass = closureInitsBeforeDCE.head._2.ownerClass.internalName
+
+        // Advanced ProdCons queries (initialProducersForValueAt) expect no unreachable code.
+        localOpt.minimalRemoveUnreachableCode(method, ownerClass)
+
+        if (AsmAnalyzer.sizeOKForSourceValue(method)) closureInstantiations.get(method) match {
+          case Some(closureInits) =>
+            // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`)
+            lazy val prodCons = new ProdConsAnalyzer(method, ownerClass)
+
+            for (init <- closureInits.valuesIterator) closureCallsites(init, prodCons) foreach {
+              case Left(warning) =>
+                backendReporting.inlinerWarning(warning.pos, warning.toString)
+
+              case Right((invocation, stackHeight)) =>
+                addRewrite(init, invocation, stackHeight)
+            }
+
+          case _ =>
+        }
 
-    // For each closure instantiation, a list of callsites of the closure that can be re-written
-    // If a callsite cannot be rewritten, for example because the lambda body method is not accessible,
-    // a warning is returned instead.
-    val callsitesToRewrite: List[(ClosureInstantiation, List[Either[RewriteClosureApplyToClosureBodyFailed, (MethodInsnNode, Int)]])] = {
-      closureInstantiationsByMethod.iterator.flatMap({
-        case (methodNode, closureInits) =>
-          // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`)
-          lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.head.ownerClass.internalName)
-          closureInits.iterator.map(init => (init, closureCallsites(init, prodCons)))
-      }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod
+      case _ =>
     }
 
-    // Rewrite all closure callsites (or issue inliner warnings for those that cannot be rewritten)
-    for ((closureInit, callsites) <- callsitesToRewrite) {
+    for ((closureInit, invocations) <- toRewrite) {
       // Local variables that hold the captured values and the closure invocation arguments.
-      // They are lazy vals to ensure that locals for captured values are only allocated if there's
-      // actually a callsite to rewrite (an not only warnings to be issued).
-      lazy val (localsForCapturedValues, argumentLocalsList) = localsForClosureRewrite(closureInit)
-      for (callsite <- callsites) callsite match {
-        case Left(warning) =>
-          backendReporting.inlinerWarning(warning.pos, warning.toString)
-
-        case Right((invocation, stackHeight)) =>
-          rewriteClosureApplyInvocation(closureInit, invocation, stackHeight, localsForCapturedValues, argumentLocalsList)
-      }
+      val (localsForCapturedValues, argumentLocalsList) = localsForClosureRewrite(closureInit)
+      for ((invocation, stackHeight) <- invocations)
+        rewriteClosureApplyInvocation(closureInit, invocation, stackHeight, localsForCapturedValues, argumentLocalsList)
     }
   }
 
@@ -122,20 +136,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     val argTypes = closureInit.lambdaMetaFactoryCall.samMethodType.getArgumentTypes
     val firstArgLocal = ownerMethod.maxLocals
 
-    // The comment in the unapply method of `LambdaMetaFactoryCall` explains why we have to introduce
-    // casts for arguments that have different types in samMethodType and instantiatedMethodType.
-    val castLoadTypes = {
-      val instantiatedMethodType = closureInit.lambdaMetaFactoryCall.instantiatedMethodType
-      (argTypes, instantiatedMethodType.getArgumentTypes).zipped map {
-        case (samArgType, instantiatedArgType) if samArgType != instantiatedArgType =>
-          // the LambdaMetaFactoryCall extractor ensures that the two types are reference types,
-          // so we don't end up casting primitive values.
-          Some(instantiatedArgType)
-        case _ =>
-          None
-      }
-    }
-    val argLocals = LocalsList.fromTypes(firstArgLocal, argTypes, castLoadTypes)
+    val argLocals = LocalsList.fromTypes(firstArgLocal, argTypes)
     ownerMethod.maxLocals = firstArgLocal + argLocals.size
 
     (captureLocals, argLocals)
@@ -154,7 +155,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
         // TODO: This is maybe over-cautious.
         // We are checking if the closure body method is accessible at the closure callsite.
         // If the closure allocation has access to the body method, then the callsite (in the same
-        // method as the alloction) should have access too.
+        // method as the allocation) should have access too.
         val bodyAccessible: Either[OptimizerWarning, Boolean] = for {
           (bodyMethodNode, declClass) <- byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc): Either[OptimizerWarning, (MethodNode, InternalName)]
           isAccessible                <- inliner.memberIsAccessible(bodyMethodNode.access, classBTypeFromParsedClassfile(declClass), classBTypeFromParsedClassfile(lambdaBodyHandle.getOwner), ownerClass)
@@ -162,7 +163,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
           isAccessible
         }
 
-        def pos = callGraph.callsites.get(invocation).map(_.callsitePosition).getOrElse(NoPosition)
+        def pos = callGraph.callsites(ownerMethod).get(invocation).map(_.callsitePosition).getOrElse(NoPosition)
         val stackSize: Either[RewriteClosureApplyToClosureBodyFailed, Int] = bodyAccessible match {
           case Left(w)      => Left(RewriteClosureAccessCheckFailed(pos, w))
           case Right(false) => Left(RewriteClosureIllegalAccess(pos, ownerClass.internalName))
@@ -173,6 +174,28 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     }).toList
   }
 
+  /**
+   * Check whether `invocation` invokes the SAM of the IndyLambda `closureInit`.
+   *
+   * In addition to a perfect match, we also identify cases where a generic FunctionN is created
+   * but the invocation is to a specialized variant apply$sp... Vice-versa, we also allow the
+   * case where a specialized FunctionN$sp.. is created but the generic apply is invoked. In
+   * these cases, the translation will introduce the necessary box / unbox invocations. Example:
+   *
+   *   val f: Int => Any = (x: Int) => 1
+   *   f(10)
+   *
+   * The IndyLambda creates a specialized `JFunction1$mcII$sp`, whose SAM is `apply$mcII$sp(I)I`.
+   * The invocation calls `apply(Object)Object`: the method name and type don't match.
+   * We identify these cases, insert the necessary unbox operation for the arguments, and invoke
+   * the `$anonfun(I)I` method.
+   *
+   * Tests in InlinerTest.optimizeSpecializedClosures. In that test, methods t4/t4a/t5/t8 show
+   * examples where the parameters have to be unboxed because generic `apply` is called, but the
+   * lambda body method takes primitive types.
+   * The opposite case is in t9: a the specialized `apply$sp..` is invoked, but the lambda body
+   * method takes boxed arguments, so we have to insert boxing operations.
+   */
   private def isSamInvocation(invocation: MethodInsnNode, closureInit: ClosureInstantiation, prodCons: => ProdConsAnalyzer): Boolean = {
     val indy = closureInit.lambdaMetaFactoryCall.indy
     if (invocation.getOpcode == INVOKESTATIC) false
@@ -187,11 +210,85 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
         receiverProducers.size == 1 && receiverProducers.head == indy
       }
 
-      invocation.name == indy.name && {
-        val indySamMethodDesc = closureInit.lambdaMetaFactoryCall.samMethodType.getDescriptor
-        indySamMethodDesc == invocation.desc
-      } &&
-        closureIsReceiver // most expensive check last
+      def isSpecializedVersion(specName: String, nonSpecName: String) = specName.startsWith(nonSpecName) && specializationSuffix.pattern.matcher(specName.substring(nonSpecName.length)).matches
+
+      def sameOrSpecializedType(specTp: Type, nonSpecTp: Type) = {
+        specTp == nonSpecTp || {
+          val specDesc = specTp.getDescriptor
+          val nonSpecDesc = nonSpecTp.getDescriptor
+          specDesc.length == 1 && primitives.contains(specDesc) && nonSpecDesc == ObjectRef.descriptor
+        }
+      }
+
+      def specializedDescMatches(specMethodDesc: String, nonSpecMethodDesc: String) = {
+        val specArgs = Type.getArgumentTypes(specMethodDesc)
+        val nonSpecArgs = Type.getArgumentTypes(nonSpecMethodDesc)
+        specArgs.corresponds(nonSpecArgs)(sameOrSpecializedType) && sameOrSpecializedType(Type.getReturnType(specMethodDesc), Type.getReturnType(nonSpecMethodDesc))
+      }
+
+      def nameAndDescMatch = {
+        val aName = invocation.name
+        val bName = indy.name
+        val aDesc = invocation.desc
+        val bDesc = closureInit.lambdaMetaFactoryCall.samMethodType.getDescriptor
+        if (aName == bName) aDesc == bDesc
+        else if (isSpecializedVersion(aName, bName)) specializedDescMatches(aDesc, bDesc)
+        else if (isSpecializedVersion(bName, aName)) specializedDescMatches(bDesc, aDesc)
+        else false
+      }
+
+      nameAndDescMatch && closureIsReceiver // most expensive check last
+    }
+  }
+
+  private def isPrimitiveType(asmType: Type) = {
+    val sort = asmType.getSort
+    Type.VOID <= sort && sort <= Type.DOUBLE
+  }
+
+  /**
+   * The argument types of the lambda body method may differ in two ways from the argument types of
+   * the closure member method that is invoked (and replaced by a call to the body).
+   *   - The lambda body method may have more specific types than the invoked closure member, see
+   *     comment in [[LambdaMetaFactoryCall.unapply]].
+   *   - The invoked closure member might be a specialized variant of the SAM or vice-versa, see
+   *     comment method [[isSamInvocation]].
+   */
+  private def adaptStoredArguments(closureInit: ClosureInstantiation, invocation: MethodInsnNode): Int => Option[AbstractInsnNode] = {
+    val invokeDesc = invocation.desc
+    // The lambda body method has additional parameters for captured values. Here we need to consider
+    // only those parameters of the body method that correspond to lambda parameters. This happens
+    // to be exactly LMF.instantiatedMethodType. In fact, `LambdaMetaFactoryCall.unapply` ensures
+    // that the body method signature is exactly (capturedParams + instantiatedMethodType).
+    val lambdaBodyMethodDescWithoutCaptures = closureInit.lambdaMetaFactoryCall.instantiatedMethodType.getDescriptor
+    if (invokeDesc == lambdaBodyMethodDescWithoutCaptures) {
+      _ => None
+    } else {
+      val invokeArgTypes = Type.getArgumentTypes(invokeDesc)
+      val implMethodArgTypes = Type.getArgumentTypes(lambdaBodyMethodDescWithoutCaptures)
+      val res = new Array[Option[AbstractInsnNode]](invokeArgTypes.length)
+      for (i <- invokeArgTypes.indices) {
+        if (invokeArgTypes(i) == implMethodArgTypes(i)) {
+          res(i) = None
+        } else if (isPrimitiveType(implMethodArgTypes(i)) && invokeArgTypes(i).getDescriptor == ObjectRef.descriptor) {
+          res(i) = Some(getScalaUnbox(implMethodArgTypes(i)))
+        } else if (isPrimitiveType(invokeArgTypes(i)) && implMethodArgTypes(i).getDescriptor == ObjectRef.descriptor) {
+          res(i) = Some(getScalaBox(invokeArgTypes(i)))
+        } else {
+          assert(!isPrimitiveType(invokeArgTypes(i)), invokeArgTypes(i))
+          assert(!isPrimitiveType(implMethodArgTypes(i)), implMethodArgTypes(i))
+          // The comment in the unapply method of `LambdaMetaFactoryCall` explains why we have to introduce
+          // casts for arguments that have different types in samMethodType and instantiatedMethodType.
+          //
+          // Note:
+          //   - invokeArgTypes is the same as the argument types in the IndyLambda's samMethodType,
+          //     this is ensured by the `isSamInvocation` filter in this file
+          //   - implMethodArgTypes is the same as the arg types in the IndyLambda's instantiatedMethodType,
+          //     this is ensured by the unapply method in LambdaMetaFactoryCall (file CallGraph)
+          res(i) = Some(new TypeInsnNode(CHECKCAST, implMethodArgTypes(i).getInternalName))
+        }
+      }
+      res
     }
   }
 
@@ -200,7 +297,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     val lambdaBodyHandle = closureInit.lambdaMetaFactoryCall.implMethod
 
     // store arguments
-    insertStoreOps(invocation, ownerMethod, argumentLocalsList)
+    insertStoreOps(invocation, ownerMethod, argumentLocalsList, adaptStoredArguments(closureInit, invocation))
 
     // drop the closure from the stack
     ownerMethod.instructions.insertBefore(invocation, new InsnNode(POP))
@@ -210,8 +307,9 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     insertLoadOps(invocation, ownerMethod, argumentLocalsList)
 
     // update maxStack
-    val capturesStackSize = localsForCapturedValues.size
-    val invocationStackHeight = stackHeight + capturesStackSize - 1 // -1 because the closure is gone
+    // One slot per value is correct for long / double, see comment in the `analysis` package object.
+    val numCapturedValues = localsForCapturedValues.locals.length
+    val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone
     if (invocationStackHeight > ownerMethod.maxStack)
       ownerMethod.maxStack = invocationStackHeight
 
@@ -227,46 +325,75 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
         insns.insertBefore(invocation, new InsnNode(DUP))
         INVOKESPECIAL
     }
-    val isInterface = bodyOpcode == INVOKEINTERFACE
-    val bodyInvocation = new MethodInsnNode(bodyOpcode, lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc, isInterface)
+    val bodyInvocation = new MethodInsnNode(bodyOpcode, lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc, lambdaBodyHandle.isInterface)
     ownerMethod.instructions.insertBefore(invocation, bodyInvocation)
 
-    val returnType = Type.getReturnType(lambdaBodyHandle.getDesc)
-    fixLoadedNothingOrNullValue(returnType, bodyInvocation, ownerMethod, btypes) // see comment of that method
+    val bodyReturnType = Type.getReturnType(lambdaBodyHandle.getDesc)
+    val invocationReturnType = Type.getReturnType(invocation.desc)
+    if (isPrimitiveType(invocationReturnType) && bodyReturnType.getDescriptor == ObjectRef.descriptor) {
+      val op =
+        if (invocationReturnType.getSort == Type.VOID) getPop(1)
+        else getScalaUnbox(invocationReturnType)
+      ownerMethod.instructions.insertBefore(invocation, op)
+    } else if (isPrimitiveType(bodyReturnType) && invocationReturnType.getDescriptor == ObjectRef.descriptor) {
+      val op =
+        if (bodyReturnType.getSort == Type.VOID) getBoxedUnit
+        else getScalaBox(bodyReturnType)
+      ownerMethod.instructions.insertBefore(invocation, op)
+    } else {
+      // see comment of that method
+      fixLoadedNothingOrNullValue(bodyReturnType, bodyInvocation, ownerMethod, btypes)
+    }
 
     ownerMethod.instructions.remove(invocation)
 
     // update the call graph
-    val originalCallsite = callGraph.callsites.remove(invocation)
+    val originalCallsite = callGraph.removeCallsite(invocation, ownerMethod)
 
     // the method node is needed for building the call graph entry
     val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc)
-    def bodyMethodIsBeingCompiled = byteCodeRepository.classNodeAndSource(lambdaBodyHandle.getOwner).map(_._2 == CompilationUnit).getOrElse(false)
-    val bodyMethodCallsite = Callsite(
-      callsiteInstruction = bodyInvocation,
-      callsiteMethod = ownerMethod,
-      callsiteClass = closureInit.ownerClass,
-      callee = bodyMethod.map({
-        case (bodyMethodNode, bodyMethodDeclClass) => Callee(
+    val sourceFilePath = byteCodeRepository.compilingClasses.get(lambdaBodyHandle.getOwner).map(_._2)
+    val callee = bodyMethod.map({
+      case (bodyMethodNode, bodyMethodDeclClass) =>
+        val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass)
+        Callee(
           callee = bodyMethodNode,
-          calleeDeclarationClass = classBTypeFromParsedClassfile(bodyMethodDeclClass),
-          safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled,
-          safeToRewrite = false, // the lambda body method is not a trait interface method
+          calleeDeclarationClass = bodyDeclClassType,
+          isStaticallyResolved = true,
+          sourceFilePath = sourceFilePath,
           annotatedInline = false,
           annotatedNoInline = false,
+          samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType),
           calleeInfoWarning = None)
-      }),
-      argInfos = Nil,
+    })
+    val argInfos = closureInit.capturedArgInfos ++ originalCallsite.map(cs => cs.argInfos map {
+      case (index, info) => (index + numCapturedValues, info)
+    }).getOrElse(IntMap.empty)
+    val bodyMethodCallsite = Callsite(
+      callsiteInstruction = bodyInvocation,
+      callsiteMethod = ownerMethod,
+      callsiteClass = closureInit.ownerClass,
+      callee = callee,
+      argInfos = argInfos,
       callsiteStackHeight = invocationStackHeight,
       receiverKnownNotNull = true, // see below (*)
-      callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition)
+      callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition),
+      annotatedInline = false,
+      annotatedNoInline = false
     )
     // (*) The documentation in class LambdaMetafactory says:
     //     "if implMethod corresponds to an instance method, the first capture argument
     //     (corresponding to the receiver) must be non-null"
     // Explanation: If the lambda body method is non-static, the receiver is a captured
     // value. It can only be captured within some instance method, so we know it's non-null.
-    callGraph.callsites(bodyInvocation) = bodyMethodCallsite
+    callGraph.addCallsite(bodyMethodCallsite)
+
+    // Rewriting a closure invocation may render code unreachable. For example, the body method of
+    // (x: T) => ??? has return type Nothing$, and an ATHROW is added (see fixLoadedNothingOrNullValue).
+    unreachableCodeEliminated -= ownerMethod
+
+    if (hasAdaptedImplMethod(closureInit) && inliner.canInlineCallsite(bodyMethodCallsite).isEmpty)
+      inliner.inlineCallsite(bodyMethodCallsite)
   }
 
   /**
@@ -283,13 +410,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     // local. On the other hand, further optimizations (copy propagation, remove unused locals) will
     // clean it up.
 
-    // Captured variables don't need to be cast when loaded at the callsite (castLoadTypes are None).
-    // This is checked in `isClosureInstantiation`: the types of the captured variables in the indy
-    // instruction match exactly the corresponding parameter types in the body method.
-    val localsForCaptures = LocalsList.fromTypes(firstCaptureLocal, capturedTypes, castLoadTypes = _ => None)
+    val localsForCaptures = LocalsList.fromTypes(firstCaptureLocal, capturedTypes)
     closureInit.ownerMethod.maxLocals = firstCaptureLocal + localsForCaptures.size
 
-    insertStoreOps(indy, closureInit.ownerMethod, localsForCaptures)
+    insertStoreOps(indy, closureInit.ownerMethod, localsForCaptures, _ => None)
     insertLoadOps(indy, closureInit.ownerMethod, localsForCaptures)
 
     localsForCaptures
@@ -301,8 +425,16 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
    *
    * The lowest stack value is stored in the head of the locals list, so the last local is stored first.
    */
-  private def insertStoreOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) =
-    insertLocalValueOps(before, methodNode, localsList, store = true)
+  private def insertStoreOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList, beforeStore: Int => Option[AbstractInsnNode]) = {
+    // The first instruction needs to store into the last local of the `localsList`.
+    // To avoid reversing the list, we use `insert(previous)`.
+    val previous = before.getPrevious
+    def ins(op: AbstractInsnNode) = methodNode.instructions.insert(previous, op)
+    for ((l, i) <- localsList.locals.zipWithIndex) {
+      ins(new VarInsnNode(l.storeOpcode, l.local))
+      beforeStore(i) foreach ins
+    }
+  }
 
   /**
    * Insert load operations in front of the `before` instruction to copy the local values denoted
@@ -310,20 +442,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
    *
    * The head of the locals list will be the lowest value on the stack, so the first local is loaded first.
    */
-  private def insertLoadOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) =
-    insertLocalValueOps(before, methodNode, localsList, store = false)
-
-  private def insertLocalValueOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList, store: Boolean): Unit = {
-    // If `store` is true, the first instruction needs to store into the last local of the `localsList`.
-    // Load instructions on the other hand are emitted in the order of the list.
-    // To avoid reversing the list, we use `insert(previousInstr)` for stores and `insertBefore(before)` for loads.
-    lazy val previous = before.getPrevious
+  private def insertLoadOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) = {
     for (l <- localsList.locals) {
-      val varOp = new VarInsnNode(if (store) l.storeOpcode else l.loadOpcode, l.local)
-      if (store) methodNode.instructions.insert(previous, varOp)
-      else methodNode.instructions.insertBefore(before, varOp)
-      if (!store) for (castType <- l.castLoadedValue)
-        methodNode.instructions.insert(varOp, new TypeInsnNode(CHECKCAST, castType.getInternalName))
+      val op = new VarInsnNode(l.loadOpcode, l.local)
+      methodNode.instructions.insertBefore(before, op)
     }
   }
 
@@ -345,12 +467,12 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
      *   Local(6, refOpOffset)  ::
      *   Nil
      */
-    def fromTypes(firstLocal: Int, types: Array[Type], castLoadTypes: Int => Option[Type]): LocalsList = {
+    def fromTypes(firstLocal: Int, types: Array[Type]): LocalsList = {
       var sizeTwoOffset = 0
       val locals: List[Local] = types.indices.map(i => {
         // The ASM method `type.getOpcode` returns the opcode for operating on a value of `type`.
         val offset = types(i).getOpcode(ILOAD) - ILOAD
-        val local = Local(firstLocal + i + sizeTwoOffset, offset, castLoadTypes(i))
+        val local = Local(firstLocal + i + sizeTwoOffset, offset)
         if (local.size == 2) sizeTwoOffset += 1
         local
       })(collection.breakOut)
@@ -364,10 +486,15 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
    * The xLOAD / xSTORE opcodes are in the following sequence: I, L, F, D, A, so the offset for
    * a local variable holding a reference (`A`) is 4. See also method `getOpcode` in [[scala.tools.asm.Type]].
    */
-  case class Local(local: Int, opcodeOffset: Int, castLoadedValue: Option[Type]) {
+  case class Local(local: Int, opcodeOffset: Int) {
     def size = if (loadOpcode == LLOAD || loadOpcode == DLOAD) 2  else 1
 
     def loadOpcode = ILOAD + opcodeOffset
     def storeOpcode = ISTORE + opcodeOffset
   }
 }
+
+object ClosureOptimizer {
+  val primitives = "BSIJCFDZV"
+  val specializationSuffix = s"(\\$$mc[$primitives]+\\$$sp)".r
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala
new file mode 100644
index 0000000000..518646812e
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala
@@ -0,0 +1,635 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2014 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package backend.jvm
+package opt
+
+import scala.annotation.{switch, tailrec}
+import scala.tools.asm.tree.analysis.BasicInterpreter
+import scala.tools.asm.Type
+import scala.tools.asm.Opcodes._
+import scala.tools.asm.tree._
+import scala.collection.mutable
+import scala.collection.JavaConverters._
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.tools.nsc.backend.jvm.analysis._
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+
+class CopyProp[BT <: BTypes](val btypes: BT) {
+  import btypes._
+  import backendUtils._
+
+
+  /**
+   * For every `xLOAD n`, find all local variable slots that are aliases of `n` using an
+   * AliasingAnalyzer and change the instruction to `xLOAD m` where `m` is the smallest alias.
+   * This leaves behind potentially stale `xSTORE n` instructions, which are then eliminated
+   * by [[eliminateStaleStores]].
+   */
+  def copyPropagation(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForAliasing(method) && {
+      var changed = false
+      val numParams = parametersSize(method)
+      lazy val aliasAnalysis = new AsmAnalyzer(method, owner, new AliasingAnalyzer(new BasicInterpreter))
+
+      // Remember locals that are used in a `LOAD` instruction. Assume a program has two LOADs:
+      //
+      //   ...
+      //   LOAD 3  // aliases of 3 here: <3>
+      //   ...
+      //   LOAD 1  // aliases of 1 here: <1, 3>
+      //
+      // In this example, we should change the second load from 1 to 3, which might render the
+      // local variable 1 unused.
+      val knownUsed = new Array[Boolean](method.maxLocals)
+
+      def usedOrMinAlias(it: IntIterator, init: Int): Int = {
+        if (knownUsed(init)) init
+        else {
+          var r = init
+          while (it.hasNext) {
+            val n = it.next()
+            // knownUsed.length is the number of locals, `n` may be a stack slot
+            if (n < knownUsed.length && knownUsed(n)) return n
+            if (n < r) r = n
+          }
+          r
+        }
+      }
+
+      val it = method.instructions.iterator
+      while (it.hasNext) it.next() match {
+        case vi: VarInsnNode if vi.`var` >= numParams && isLoad(vi) =>
+          val aliases = aliasAnalysis.frameAt(vi).asInstanceOf[AliasingFrame[_]].aliasesOf(vi.`var`)
+          if (aliases.size > 1) {
+            val alias = usedOrMinAlias(aliases.iterator, vi.`var`)
+            if (alias != -1) {
+              changed = true
+              vi.`var` = alias
+            }
+          }
+          knownUsed(vi.`var`) = true
+
+        case _ =>
+      }
+
+      changed
+    }
+  }
+
+  /**
+   * Eliminate `xSTORE` instructions that have no consumer. If the instruction can be completely
+   * eliminated, it is replaced by a POP. The [[eliminatePushPop]] cleans up unnecessary POPs.
+   *
+   * Note that an `ASOTRE` can not always be eliminated: it removes a reference to the object that
+   * is currently stored in that local, which potentially frees it for GC (SI-5313). Therefore
+   * we replace such stores by `POP; ACONST_NULL; ASTORE x`.
+   */
+  def eliminateStaleStores(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForSourceValue(method) && {
+      lazy val prodCons = new ProdConsAnalyzer(method, owner)
+      def hasNoCons(varIns: AbstractInsnNode, slot: Int) = prodCons.consumersOfValueAt(varIns.getNext, slot).isEmpty
+
+      // insns to delete: IINC that have no consumer
+      val toDelete = mutable.ArrayBuffer.empty[IincInsnNode]
+
+      // xSTORE insns to be replaced by POP or POP2
+      val storesToDrop = mutable.ArrayBuffer.empty[VarInsnNode]
+
+      // ASTORE insn that have no consumer.
+      //   - if the local is not live, the store is replaced by POP
+      //   - otherwise, pop the argument value and store NULL instead. Unless the boolean field is
+      //     `true`: then the store argument is already known to be ACONST_NULL.
+      val toNullOut = mutable.ArrayBuffer.empty[(VarInsnNode, Boolean)]
+
+      // `true` for variables that are known to be live
+      val liveVars = new Array[Boolean](method.maxLocals)
+
+      val it = method.instructions.iterator
+      while (it.hasNext) it.next() match {
+        case vi: VarInsnNode if isStore(vi) && hasNoCons(vi, vi.`var`) =>
+          val canElim = vi.getOpcode != ASTORE || {
+            val currentFieldValueProds = prodCons.initialProducersForValueAt(vi, vi.`var`)
+            currentFieldValueProds.size == 1 && (currentFieldValueProds.head match {
+              case ParameterProducer(0) => !isStaticMethod(method) // current field value is `this`, which won't be gc'd anyway
+              case _: UninitializedLocalProducer => true // field is not yet initialized, so current value cannot leak
+              case _ => false
+            })
+          }
+          if (canElim) storesToDrop += vi
+          else {
+            val prods = prodCons.producersForValueAt(vi, prodCons.frameAt(vi).stackTop)
+            val isStoreNull = prods.size == 1 && prods.head.getOpcode == ACONST_NULL
+            toNullOut += ((vi, isStoreNull))
+          }
+
+        case ii: IincInsnNode if hasNoCons(ii, ii.`var`) =>
+          toDelete += ii
+
+        case vi: VarInsnNode =>
+          liveVars(vi.`var`) = true
+
+        case ii: IincInsnNode =>
+          liveVars(ii.`var`) = true
+
+        case _ =>
+      }
+
+      def replaceByPop(vi: VarInsnNode): Unit = {
+        val size = if (isSize2LoadOrStore(vi.getOpcode)) 2 else 1
+        method.instructions.set(vi, getPop(size))
+      }
+
+      toDelete foreach method.instructions.remove
+
+      storesToDrop foreach replaceByPop
+
+      for ((vi, isStoreNull) <- toNullOut) {
+        if (!liveVars(vi.`var`)) replaceByPop(vi) // can drop `ASTORE x` where x has only dead stores
+        else {
+          if (!isStoreNull) {
+            val prev = vi.getPrevious
+            method.instructions.insert(prev, new InsnNode(ACONST_NULL))
+            method.instructions.insert(prev, getPop(1))
+          }
+        }
+      }
+
+      toDelete.nonEmpty || storesToDrop.nonEmpty || toNullOut.nonEmpty
+    }
+  }
+
+  /**
+   * When a POP instruction has a single producer, remove the POP and eliminate the producer by
+   * bubbling up the POPs. For example, given
+   *   ILOAD 1; ILOAD 2; IADD; POP
+   * we first eliminate the POP, then the IADD, then its inputs, so the entire sequence goes away.
+   * If a producer cannot be eliminated (need to keep side-effects), a POP is inserted.
+   *
+   * A special case eliminates the creation of unused objects with side-effect-free constructors:
+   *   NEW scala/Tuple1; DUP; ALOAD 0; INVOKESPECIAL scala/Tuple1.<init>; POP
+   * The POP has a single producer (the DUP), it's easy to eliminate these two. A special case
+   * is needed to eliminate the INVOKESPECIAL and NEW.
+   */
+  def eliminatePushPop(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForSourceValue(method) && {
+      // A queue of instructions producing a value that has to be eliminated. If possible, the
+      // instruction (and its inputs) will be removed, otherwise a POP is inserted after
+      val queue = mutable.Queue.empty[ProducedValue]
+      // Contains constructor invocations for values that can be eliminated if unused.
+      val sideEffectFreeConstructorCalls = mutable.ArrayBuffer.empty[MethodInsnNode]
+
+      // instructions to remove (we don't change the bytecode while analyzing it. this allows
+      // running the ProdConsAnalyzer only once.)
+      val toRemove = mutable.Set.empty[AbstractInsnNode]
+      // instructions to insert before some instruction
+      val toInsertBefore = mutable.Map.empty[AbstractInsnNode, List[InsnNode]]
+      // an instruction to insert after some instruction
+      val toInsertAfter = mutable.Map.empty[AbstractInsnNode, AbstractInsnNode]
+
+      lazy val prodCons = new ProdConsAnalyzer(method, owner)
+
+      /**
+       * Returns the producers for the stack value `inputSlot` consumed by `cons`, if the consumer
+       * instruction is the only consumer for all of these producers.
+       *
+       * If a producer has multiple consumers, or the value is the caught exception in a catch
+       * block, this method returns Set.empty.
+       */
+      def producersIfSingleConsumer(cons: AbstractInsnNode, inputSlot: Int): Set[AbstractInsnNode] = {
+        /**
+         * True if the values produced by `prod` are all the same. Most instructions produce a single
+         * value. DUP and DUP2 (with a size-2 input) produce two equivalent values. However, there
+         * are some exotic instructions that produce multiple non-equal values (DUP_X1, SWAP, ...).
+         *
+         * Assume we have `DUP_X2; POP`. In order to remove the `POP` we need to change the DUP_X2
+         * into something else, which is not straightforward.
+         *
+         * Since scalac never emits any of those exotic bytecodes, we don't optimize them.
+         */
+        def producerHasSingleOutput(prod: AbstractInsnNode): Boolean = prod match {
+          case _: ExceptionProducer[_] | _: UninitializedLocalProducer =>
+            // POP of an exception in a catch block cannot be removed. For an uninitialized local,
+            // there should not be a consumer. We are conservative and include it here, so the
+            // producer would not be removed.
+            false
+
+          case _: ParameterProducer =>
+            true
+
+          case _ => (prod.getOpcode: @switch) match {
+            case DUP => true
+            case DUP2 => prodCons.frameAt(prod).peekStack(0).getSize == 2
+            case _ => InstructionStackEffect.prod(InstructionStackEffect.forAsmAnalysis(prod, prodCons.frameAt(prod))) == 1
+          }
+        }
+
+        val prods = prodCons.producersForValueAt(cons, inputSlot)
+        val singleConsumer = prods forall { prod =>
+          producerHasSingleOutput(prod) && {
+            // for DUP / DUP2, we only consider the value that is actually consumed by cons
+            val conss = prodCons.consumersOfValueAt(prod.getNext, inputSlot)
+            conss.size == 1 && conss.head == cons
+          }
+        }
+        if (singleConsumer) prods else Set.empty
+      }
+
+      /**
+       * For a POP instruction that is the single consumer of its producers, remove the POP and
+       * enqueue the producers.
+       */
+      def handleInitialPop(pop: AbstractInsnNode): Unit = {
+        val prods = producersIfSingleConsumer(pop, prodCons.frameAt(pop).stackTop)
+        if (prods.nonEmpty) {
+          toRemove += pop
+          val size = if (pop.getOpcode == POP2) 2 else 1
+          queue ++= prods.map(ProducedValue(_, size))
+        }
+      }
+
+      /**
+       * Traverse the method in its initial state and collect all POP instructions and side-effect
+       * free constructor invocations that can be eliminated.
+       */
+      def collectInitialPopsAndPureConstrs(): Unit = {
+        val it = method.instructions.iterator
+        while (it.hasNext) {
+          val insn = it.next()
+          (insn.getOpcode: @switch) match {
+            case POP | POP2 =>
+              handleInitialPop(insn)
+
+            case INVOKESPECIAL =>
+              val mi = insn.asInstanceOf[MethodInsnNode]
+              if (isSideEffectFreeConstructorCall(mi)) sideEffectFreeConstructorCalls += mi
+
+            case _ =>
+          }
+        }
+      }
+
+      /**
+       * Eliminate the `numArgs` inputs of the instruction `prod` (which was eliminated). For
+       * each input value
+       *   - if the `prod` instruction is the single consumer, enqueue the producers of the input
+       *   - otherwise, insert a POP instruction to POP the input value
+       */
+      def handleInputs(prod: AbstractInsnNode, numArgs: Int): Unit = {
+        val frame = prodCons.frameAt(prod)
+        val pops = mutable.ListBuffer.empty[InsnNode]
+        @tailrec def handle(stackOffset: Int): Unit = {
+          if (stackOffset >= 0) {
+            val prods = producersIfSingleConsumer(prod, frame.stackTop - stackOffset)
+            val nSize = frame.peekStack(stackOffset).getSize
+            if (prods.isEmpty) pops append getPop(nSize)
+            else queue ++= prods.map(ProducedValue(_, nSize))
+            handle(stackOffset - 1)
+          }
+        }
+        handle(numArgs - 1) // handle stack offsets (numArgs - 1) to 0
+        if (pops.nonEmpty) toInsertBefore(prod) = pops.toList
+      }
+
+      /**
+       * Eliminate LMF `indy` and its inputs.
+       */
+      def handleClosureInst(indy: InvokeDynamicInsnNode): Unit = {
+        toRemove += indy
+        callGraph.removeClosureInstantiation(indy, method)
+        handleInputs(indy, Type.getArgumentTypes(indy.desc).length)
+      }
+
+      def runQueue(): Unit = while (queue.nonEmpty) {
+        val ProducedValue(prod, size) = queue.dequeue()
+
+        def prodString = s"Producer ${AsmUtils textify prod}@${method.instructions.indexOf(prod)}\n${AsmUtils textify method}"
+        def popAfterProd(): Unit = toInsertAfter(prod) = getPop(size)
+
+        (prod.getOpcode: @switch) match {
+          case ACONST_NULL | ICONST_M1 | ICONST_0 | ICONST_1 | ICONST_2 | ICONST_3 | ICONST_4 | ICONST_5 | LCONST_0 | LCONST_1 | FCONST_0 | FCONST_1 | FCONST_2 | DCONST_0 | DCONST_1 |
+               BIPUSH | SIPUSH | ILOAD | LLOAD | FLOAD | DLOAD | ALOAD=>
+            toRemove += prod
+
+          case opc @ (DUP | DUP2) =>
+            assert(opc != 2 || size == 2, s"DUP2 for two size-1 values; $prodString") // ensured in method `producerHasSingleOutput`
+            if (toRemove(prod))
+            // the DUP is already scheduled for removal because one of its consumers is a POP.
+            // now the second consumer is also a POP, so we need to eliminate the DUP's input.
+              handleInputs(prod, 1)
+            else
+              toRemove += prod
+
+          case DUP_X1 | DUP_X2 | DUP2_X1 | DUP2_X2 | SWAP =>
+            // these are excluded in method `producerHasSingleOutput`
+            assert(false, s"Cannot eliminate value pushed by an instruction with multiple output values; $prodString")
+
+          case IDIV | LDIV | IREM | LREM =>
+            popAfterProd() // keep potential division by zero
+
+          case IADD | LADD | FADD | DADD | ISUB | LSUB | FSUB | DSUB | IMUL | LMUL | FMUL | DMUL | FDIV | DDIV | FREM | DREM |
+               LSHL | LSHR | LUSHR |
+               IAND | IOR | IXOR | LAND | LOR | LXOR |
+               LCMP | FCMPL | FCMPG | DCMPL | DCMPG =>
+            toRemove += prod
+            handleInputs(prod, 2)
+
+          case INEG | LNEG | FNEG | DNEG |
+               I2L | I2F | I2D | L2I | L2F | L2D | F2I | F2L | F2D | D2I | D2L | D2F | I2B | I2C | I2S =>
+            toRemove += prod
+            handleInputs(prod, 1)
+
+          case GETFIELD | GETSTATIC =>
+            // TODO eliminate side-effect free module loads (https://github.com/scala/scala-dev/issues/16)
+            if (isBoxedUnit(prod)) toRemove += prod
+            else popAfterProd() // keep potential class initialization (static field) or NPE (instance field)
+
+          case INVOKEVIRTUAL | INVOKESPECIAL | INVOKESTATIC | INVOKEINTERFACE =>
+            val methodInsn = prod.asInstanceOf[MethodInsnNode]
+            if (isSideEffectFreeCall(methodInsn)) {
+              toRemove += prod
+              callGraph.removeCallsite(methodInsn, method)
+              val receiver = if (methodInsn.getOpcode == INVOKESTATIC) 0 else 1
+              handleInputs(prod, Type.getArgumentTypes(methodInsn.desc).length + receiver)
+            } else
+              popAfterProd()
+
+          case INVOKEDYNAMIC =>
+            prod match {
+              case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => handleClosureInst(indy)
+              case _ => popAfterProd()
+            }
+
+          case NEW =>
+            if (isNewForSideEffectFreeConstructor(prod)) toRemove += prod
+            else popAfterProd()
+
+          case LDC => prod.asInstanceOf[LdcInsnNode].cst match {
+            case _: java.lang.Integer | _: java.lang.Float | _: java.lang.Long | _: java.lang.Double | _: String =>
+              toRemove += prod
+
+            case _ =>
+              // don't remove class literals, method types, method handles: keep a potential NoClassDefFoundError
+              popAfterProd()
+          }
+
+          case MULTIANEWARRAY =>
+            toRemove += prod
+            handleInputs(prod, prod.asInstanceOf[MultiANewArrayInsnNode].dims)
+
+          case _ =>
+            popAfterProd()
+        }
+      }
+
+      // there are two cases when we can eliminate a constructor call:
+      //   - NEW T; INVOKESPECIAL T.<init> -- there's no DUP, the new object is consumed only by the constructor)
+      //   - NEW T; DUP; INVOKESPECIAL T.<init>, where the DUP will be removed
+      def eliminateUnusedPureConstructorCalls(): Boolean = {
+        var changed = false
+
+        def removeConstructorCall(mi: MethodInsnNode): Unit = {
+          toRemove += mi
+          callGraph.removeCallsite(mi, method)
+          sideEffectFreeConstructorCalls -= mi
+          changed = true
+        }
+
+        for (mi <- sideEffectFreeConstructorCalls.toList) { // toList to allow removing elements while traversing
+        val frame = prodCons.frameAt(mi)
+          val stackTop = frame.stackTop
+          val numArgs = Type.getArgumentTypes(mi.desc).length
+          val receiverProds = producersIfSingleConsumer(mi, stackTop - numArgs)
+          if (receiverProds.size == 1) {
+            val receiverProd = receiverProds.head
+            if (receiverProd.getOpcode == NEW) {
+              removeConstructorCall(mi)
+              handleInputs(mi, numArgs + 1) // removes the producers of args and receiver
+            } else if (receiverProd.getOpcode == DUP && toRemove.contains(receiverProd)) {
+              val dupProds = producersIfSingleConsumer(receiverProd, prodCons.frameAt(receiverProd).stackTop)
+              if (dupProds.size == 1 && dupProds.head.getOpcode == NEW) {
+                removeConstructorCall(mi)
+                handleInputs(mi, numArgs) // removes the producers of args. the producer of the receiver is DUP and already in toRemove.
+                queue += ProducedValue(dupProds.head, 1) // removes the NEW (which is NOT the producer of the receiver!)
+              }
+            }
+          }
+        }
+        changed
+      }
+
+      collectInitialPopsAndPureConstrs()
+
+      // eliminating producers enables eliminating unused constructor calls (when a DUP gets removed).
+      // vice-versa, eliminating a constructor call adds producers of constructor parameters to the queue.
+      // so the two run in a loop.
+      runQueue()
+      while (eliminateUnusedPureConstructorCalls())
+        runQueue()
+
+      var changed = false
+      toInsertAfter foreach {
+        case (target, insn) =>
+          nextExecutableInstructionOrLabel(target) match {
+            // `insn` is of type `InsnNode`, so we only need to check the Opcode when comparing to another instruction
+            case Some(next) if next.getOpcode == insn.getOpcode && toRemove(next) =>
+              // Inserting and removing a POP at the same place should not enable `changed`. This happens
+              // when a POP directly follows a producer that cannot be eliminated, e.g. INVOKESTATIC A.m ()I; POP
+              // The POP is initially added to `toRemove`, and the `INVOKESTATIC` producer is added to the queue.
+              // Because the producer cannot be elided, a POP is added to `toInsertAfter`.
+              toRemove -= next
+
+            case _ =>
+              changed = true
+              method.instructions.insert(target, insn)
+          }
+      }
+      toInsertBefore foreach {
+        case (target, insns) =>
+          changed = true
+          insns.foreach(method.instructions.insertBefore(target, _))
+      }
+      toRemove foreach { insn =>
+        changed = true
+        method.instructions.remove(insn)
+      }
+      changed
+    }
+  }
+
+  case class ProducedValue(producer: AbstractInsnNode, size: Int) {
+    override def toString = s"<${AsmUtils textify producer}>"
+  }
+
+  /**
+   * Remove `xSTORE n; xLOAD n` pairs if
+   *   - the local variable n is not used anywhere else in the method (1), and
+   *   - there are no executable instructions and no live labels (jump targets) between the two (2)
+   *
+   * Note: store-load pairs that cannot be eliminated could be replaced by `DUP; xSTORE n`, but
+   * that's just cosmetic and doesn't help for anything.
+   *
+   * (1) This could be made more precise by running a prodCons analysis and checking that the load
+   * is the only user of the store. Then we could eliminate the pair even if the variable is live
+   * (except for ASTORE, SI-5313). Not needing an analyzer is more efficient, and catches most
+   * cases.
+   *
+   * (2) The implementation uses a conservative estimation for liveness (if some instruction uses
+   * local n, then n is considered live in the entire method). In return, it doesn't need to run an
+   * Analyzer on the method, making it more efficient.
+   *
+   * This method also removes `ACONST_NULL; ASTORE n` if the local n is not live. This pattern is
+   * introduced by [[eliminateStaleStores]].
+   *
+   * The implementation is a little tricky to support the following case:
+   *   ISTORE 1; ISTORE 2; ILOAD 2; ACONST_NULL; ASTORE 3; ILOAD 1
+   * The outer store-load pair can be removed if two the inner pairs can be.
+   */
+  def eliminateStoreLoad(method: MethodNode): Boolean = {
+    val removePairs = mutable.Set.empty[RemovePair]
+    val liveVars = new Array[Boolean](method.maxLocals)
+    val liveLabels = mutable.Set.empty[LabelNode]
+
+    def mkRemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]): RemovePair = {
+      val r = RemovePair(store, other, depends)
+      removePairs += r
+      r
+    }
+
+    def registerLiveVarsLabels(insn: AbstractInsnNode): Unit = insn match {
+      case vi: VarInsnNode => liveVars(vi.`var`) = true
+      case ii: IincInsnNode => liveVars(ii.`var`) = true
+      case j: JumpInsnNode => liveLabels += j.label
+      case s: TableSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala
+      case s: LookupSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala
+      case _ =>
+    }
+
+    val pairStartStack = new mutable.Stack[(AbstractInsnNode, mutable.ListBuffer[RemovePairDependency])]
+
+    def push(insn: AbstractInsnNode) = {
+      pairStartStack push ((insn, mutable.ListBuffer.empty))
+    }
+
+    def addDepends(dependency: RemovePairDependency) = if (pairStartStack.nonEmpty) {
+      val (_, depends) = pairStartStack.top
+      depends += dependency
+    }
+
+    def completesStackTop(load: AbstractInsnNode) = isLoad(load) && pairStartStack.nonEmpty && {
+      pairStartStack.top match {
+        case (store: VarInsnNode, _) => store.`var` == load.asInstanceOf[VarInsnNode].`var`
+        case _ => false
+      }
+    }
+
+    /**
+     * Try to pair `insn` with its correspondent on the stack
+     *   - if the stack top is a store and `insn` is a corresponding load, create a pair
+     *   - otherwise, check the two top stack values for `null; store`. if it matches, create
+     *     a pair and continue pairing `insn` on the remaining stack
+     *   - otherwise, empty the stack and mark the local variables in it live
+     */
+    def tryToPairInstruction(insn: AbstractInsnNode): Unit = {
+      @tailrec def emptyStack(): Unit = if (pairStartStack.nonEmpty) {
+        registerLiveVarsLabels(pairStartStack.pop()._1)
+        emptyStack()
+      }
+
+      @tailrec def tryPairing(): Unit = {
+        if (completesStackTop(insn)) {
+          val (store: VarInsnNode, depends) = pairStartStack.pop()
+          addDepends(mkRemovePair(store, insn, depends.toList))
+        } else if (pairStartStack.nonEmpty) {
+          val (top, topDepends) = pairStartStack.pop()
+          if (pairStartStack.nonEmpty) {
+            (pairStartStack.top, top) match {
+              case ((ldNull: InsnNode, depends), store: VarInsnNode) if ldNull.getOpcode == ACONST_NULL && store.getOpcode == ASTORE =>
+                pairStartStack.pop()
+                addDepends(mkRemovePair(store, ldNull, depends.toList))
+                // example: store; (null; store;) (store; load;) load
+                //                         s1^     ^^^^^p1^^^^^        // p1 is added to s1's depends
+                // then:    store; (null; store;) load
+                //           s2^    ^^^^p2^^^^^                        // p1 and p2 are added to s2's depends
+                topDepends foreach addDepends
+                tryPairing()
+
+              case _ =>
+                // empty the stack - a non-matching insn was found, cannot create any pairs to remove
+                registerLiveVarsLabels(insn)
+                registerLiveVarsLabels(top)
+                emptyStack()
+            }
+          } else {
+            // stack only has one element
+            registerLiveVarsLabels(insn)
+            registerLiveVarsLabels(top)
+          }
+        } else {
+          // stack is empty already
+          registerLiveVarsLabels(insn)
+        }
+      }
+
+      tryPairing()
+    }
+
+
+    var insn = method.instructions.getFirst
+
+    @tailrec def advanceToNextExecutableOrLabel(): Unit = {
+      insn = insn.getNext
+      if (insn != null && !isExecutable(insn) && !insn.isInstanceOf[LabelNode]) advanceToNextExecutableOrLabel()
+    }
+
+    while (insn != null) {
+      insn match {
+        case _ if insn.getOpcode == ACONST_NULL          => push(insn)
+        case vi: VarInsnNode if isStore(vi)              => push(insn)
+        case label: LabelNode if pairStartStack.nonEmpty => addDepends(LabelNotLive(label))
+        case _                                           => tryToPairInstruction(insn)
+      }
+      advanceToNextExecutableOrLabel()
+    }
+
+    // elide RemovePairs that depend on live labels or other RemovePair that have to be elided.
+    // example:  store 1; store 2; label x; load 2; load 1
+    // if x is live, the inner pair has to be elided, causing the outer pair to be elided too.
+
+    var doneEliding = false
+
+    def elide(removePair: RemovePair) = {
+      doneEliding = false
+      liveVars(removePair.store.`var`) = true
+      removePairs -= removePair
+    }
+
+    while (!doneEliding) {
+      doneEliding = true
+      for (removePair <- removePairs.toList) {
+        val slot = removePair.store.`var`
+        if (liveVars(slot)) elide(removePair)
+        else removePair.depends foreach {
+          case LabelNotLive(label) => if (liveLabels(label)) elide(removePair)
+          case other: RemovePair => if (!removePairs(other)) elide(removePair)
+        }
+      }
+    }
+
+    for (removePair <- removePairs) {
+      method.instructions.remove(removePair.store)
+      method.instructions.remove(removePair.other)
+    }
+
+    removePairs.nonEmpty
+  }
+}
+
+trait RemovePairDependency
+case class RemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]) extends RemovePairDependency {
+  override def toString = s"<${AsmUtils textify store},${AsmUtils textify other}> [$depends]"
+}
+case class LabelNotLive(label: LabelNode) extends RemovePairDependency
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
index e7dd5abc57..7bc4ea2392 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
@@ -27,7 +27,7 @@ import scala.tools.nsc.backend.jvm.BackendReporting.UnknownScalaInlineInfoVersio
  * In principle we could encode the InlineInfo into a Java annotation (instead of a classfile attribute).
  * However, an attribute allows us to save many bits. In particular, note that the strings in an
  * InlineInfo are serialized as references to constants in the constant pool, and those strings
- * (traitImplClassSelfType, method names, method signatures) would exist in there anyway. So the
+ * (method names, method signatures) would exist in there anyway. So the
  * ScalaInlineAttribute remains relatively compact.
  */
 case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineInfoAttribute.attributeName) {
@@ -47,13 +47,16 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 
     result.putByte(InlineInfoAttribute.VERSION)
 
-    var hasSelfIsFinal = 0
-    if (inlineInfo.isEffectivelyFinal)               hasSelfIsFinal |= 1
-    if (inlineInfo.traitImplClassSelfType.isDefined) hasSelfIsFinal |= 2
-    result.putByte(hasSelfIsFinal)
+    var flags = 0
+    if (inlineInfo.isEffectivelyFinal)      flags |= 1
+    //                                      flags |= 2 // no longer written
+    if (inlineInfo.sam.isDefined)           flags |= 4
+    result.putByte(flags)
 
-    for (selfInternalName <- inlineInfo.traitImplClassSelfType) {
-      result.putShort(cw.newUTF8(selfInternalName))
+    for (samNameDesc <- inlineInfo.sam) {
+      val (name, desc) = samNameDesc.span(_ != '(')
+      result.putShort(cw.newUTF8(name))
+      result.putShort(cw.newUTF8(desc))
     }
 
     // The method count fits in a short (the methods_count in a classfile is also a short)
@@ -68,10 +71,10 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
       result.putShort(cw.newUTF8(desc))
 
       var inlineInfo = 0
-      if (info.effectivelyFinal)                    inlineInfo |= 1
-      if (info.traitMethodWithStaticImplementation) inlineInfo |= 2
-      if (info.annotatedInline)                     inlineInfo |= 4
-      if (info.annotatedNoInline)                   inlineInfo |= 8
+      if (info.effectivelyFinal)  inlineInfo |= 1
+      //                          inlineInfo |= 2 // no longer written
+      if (info.annotatedInline)   inlineInfo |= 4
+      if (info.annotatedNoInline) inlineInfo |= 8
       result.putByte(inlineInfo)
     }
 
@@ -79,7 +82,7 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
   }
 
   /**
-   * De-serialize the attribute into an InlineInfo. The attribute starts at cr.b(off), but we don't
+   * Deserialize the attribute into an InlineInfo. The attribute starts at cr.b(off), but we don't
    * need to access that array directly, we can use the `read` methods provided by the ClassReader.
    *
    * `buf` is a pre-allocated character array that is guaranteed to be long enough to hold any
@@ -94,15 +97,17 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 
     val version = nextByte()
     if (version == 1) {
-      val hasSelfIsFinal = nextByte()
-      val isFinal = (hasSelfIsFinal & 1) != 0
-      val hasSelf = (hasSelfIsFinal & 2) != 0
-
-      val self = if (hasSelf) {
-        val selfName = nextUTF8()
-        Some(selfName)
-      } else {
-        None
+      val flags = nextByte()
+      val isFinal           = (flags & 1) != 0
+      val hasSelf           = (flags & 2) != 0
+      val hasSam            = (flags & 4) != 0
+
+      if (hasSelf) nextUTF8() // no longer used
+
+      val sam = if (!hasSam) None else {
+        val name = nextUTF8()
+        val desc = nextUTF8()
+        Some(name + desc)
       }
 
       val numEntries = nextShort()
@@ -111,14 +116,15 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
         val desc = nextUTF8()
 
         val inlineInfo = nextByte()
-        val isFinal                             = (inlineInfo & 1) != 0
-        val traitMethodWithStaticImplementation = (inlineInfo & 2) != 0
-        val isInline                            = (inlineInfo & 4) != 0
-        val isNoInline                          = (inlineInfo & 8) != 0
-        (name + desc, MethodInlineInfo(isFinal, traitMethodWithStaticImplementation, isInline, isNoInline))
+        val isFinal    = (inlineInfo & 1) != 0
+        //             = (inlineInfo & 2) != 0 // no longer used
+        val isInline   = (inlineInfo & 4) != 0
+        val isNoInline = (inlineInfo & 8) != 0
+        (name + desc, MethodInlineInfo(isFinal, isInline, isNoInline))
       }).toMap
 
-      InlineInfoAttribute(InlineInfo(self, isFinal, infos, None))
+      val info = InlineInfo(isFinal, sam, infos, None)
+      InlineInfoAttribute(info)
     } else {
       val msg = UnknownScalaInlineInfoVersion(cr.getClassName, version)
       InlineInfoAttribute(BTypes.EmptyInlineInfo.copy(warning = Some(msg)))
@@ -128,9 +134,18 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 
 object InlineInfoAttribute {
   /**
+   * Notes:
+   *  - `traitImplClassSelfType` is no longer emitted, `hasTraitImplClassSelfType` is always emitted
+   *    as 0. Similarly, `traitMethodWithStaticImplementation` is always emitted 0.
+   *  - When reading an existing attribute where `hasTraitImplClassSelfType` is 1, the
+   *    `traitImplClassSelfType` is ignored. Also the value of `traitMethodWithStaticImplementation`
+   *    is ignored.
+   *
    * [u1]    version
-   * [u1]    isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1)
+   * [u1]    isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1), hasSam (<< 2), hasLateInterfaces (<< 3)
    * [u2]?   traitImplClassSelfType (reference)
+   * [u2]?   samName (reference)
+   * [u2]?   samDescriptor (reference)
    * [u2]    numMethodEntries
    *   [u2]  name (reference)
    *   [u2]  descriptor (reference)
@@ -142,7 +157,7 @@ object InlineInfoAttribute {
 }
 
 /**
- * In order to instruct the ASM framework to de-serialize the ScalaInlineInfo attribute, we need
+ * In order to instruct the ASM framework to deserialize the ScalaInlineInfo attribute, we need
  * to pass a prototype instance when running the class reader.
  */
-object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null))
+object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(false, null, null, null))
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
index 6b2786c1a3..1c29859f46 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
@@ -9,59 +9,125 @@ package opt
 
 import scala.annotation.tailrec
 import scala.tools.asm
-import asm.Handle
 import asm.Opcodes._
 import asm.tree._
-import scala.collection.convert.decorateAsScala._
-import scala.collection.convert.decorateAsJava._
+import scala.collection.JavaConverters._
 import AsmUtils._
 import BytecodeUtils._
 import collection.mutable
-import scala.tools.asm.tree.analysis.SourceInterpreter
 import BackendReporting._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
 
 class Inliner[BT <: BTypes](val btypes: BT) {
   import btypes._
   import callGraph._
+  import inlinerHeuristics._
+  import backendUtils._
 
-  def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = {
-    localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach {
-      case invocation: MethodInsnNode  => callGraph.callsites.remove(invocation)
-      case indy: InvokeDynamicInsnNode => callGraph.closureInstantiations.remove(indy)
-      case _ =>
+  sealed trait InlineLog {
+    def request: InlineRequest
+  }
+  final case class InlineLogSuccess(request: InlineRequest, sizeBefore: Int, sizeInlined: Int) extends InlineLog {
+    var downstreamLog: mutable.Buffer[InlineLog] = mutable.ListBuffer.empty
+  }
+  final case class InlineLogFail(request: InlineRequest, warning: CannotInlineWarning) extends InlineLog
+  final case class InlineLogRollback(request: InlineRequest, warnings: List[CannotInlineWarning]) extends InlineLog
+
+  object InlineLog {
+    private def shouldLog(request: InlineRequest): Boolean = {
+      def logEnabled = compilerSettings.YoptLogInline.isSetByUser
+      def matchesName = {
+        val prefix = compilerSettings.YoptLogInline.value match {
+          case "_" => ""
+          case p => p
+        }
+        val name: String = request.callsite.callsiteClass.internalName + "." + request.callsite.callsiteMethod.name
+        name startsWith prefix
+      }
+      logEnabled && (upstream != null || (isTopLevel && matchesName))
+    }
+
+    // indexed by callsite method
+    private val logs = mutable.Map.empty[MethodNode, mutable.LinkedHashSet[InlineLog]]
+
+    private var upstream: InlineLogSuccess = _
+    private var isTopLevel = true
+
+    def withInlineLogging[T](request: InlineRequest)(inlineRequest: => Unit)(inlinePost: => T): T = {
+      def doInlinePost(): T = {
+        val savedIsTopLevel = isTopLevel
+        isTopLevel = false
+        try inlinePost
+        finally isTopLevel = savedIsTopLevel
+      }
+      if (shouldLog(request)) {
+        val sizeBefore = request.callsite.callsiteMethod.instructions.size
+        inlineRequest
+        val log = InlineLogSuccess(request, sizeBefore, request.callsite.callee.get.callee.instructions.size)
+        apply(log)
+
+        val savedUpstream = upstream
+        upstream = log
+        try doInlinePost()
+        finally upstream = savedUpstream
+      } else {
+        inlineRequest
+        doInlinePost()
+      }
+    }
+
+    def apply(log: => InlineLog): Unit = if (shouldLog(log.request)) {
+      if (upstream != null) upstream.downstreamLog += log
+      else {
+        val methodLogs = logs.getOrElseUpdate(log.request.callsite.callsiteMethod, mutable.LinkedHashSet.empty)
+        methodLogs += log
+      }
+    }
+
+    def entryString(log: InlineLog, indent: Int = 0): String = {
+      val callee = log.request.callsite.callee.get
+      val calleeString = callee.calleeDeclarationClass.internalName + "." + callee.callee.name
+      val indentString = " " * indent
+      log match {
+        case s @ InlineLogSuccess(_, sizeBefore, sizeInlined) =>
+          val self = s"${indentString}inlined $calleeString. Before: $sizeBefore ins, inlined: $sizeInlined ins."
+          if (s.downstreamLog.isEmpty) self
+          else s.downstreamLog.iterator.map(entryString(_, indent + 2)).mkString(self + "\n", "\n", "")
+
+        case InlineLogFail(_, w) =>
+          s"${indentString}failed $calleeString. ${w.toString.replace('\n', ' ')}"
+
+        case InlineLogRollback(_, _) =>
+          s"${indentString}rolling back, nested inline failed."
+      }
+    }
+
+    def print(): Unit = if (compilerSettings.YoptLogInline.isSetByUser) {
+      val byClassAndMethod: List[(InternalName, mutable.Map[MethodNode, mutable.LinkedHashSet[InlineLog]])] = {
+        logs.
+          groupBy(_._2.head.request.callsite.callsiteClass.internalName).
+          toList.sortBy(_._1)
+      }
+      for {
+        (c, methodLogs) <- byClassAndMethod
+        (m, mLogs) <- methodLogs.toList.sortBy(_._1.name)
+        mLog <- mLogs // insertion order
+      } {
+        println(s"Inline into $c.${m.name}: ${entryString(mLog)}")
+      }
     }
   }
 
   def runInliner(): Unit = {
-    rewriteFinalTraitMethodInvocations()
-
     for (request <- collectAndOrderInlineRequests) {
-      val Right(callee) = request.callee // collectAndOrderInlineRequests returns callsites with a known callee
-
-      // Inlining a method can create unreachable code. Example:
-      //   def f = throw e
-      //   def g = f; println() // println is unreachable after inlining f
-      // If we have an inline request for a call to g, and f has been already inlined into g, we
-      // need to run DCE before inlining g.
-      eliminateUnreachableCodeAndUpdateCallGraph(callee.callee, callee.calleeDeclarationClass.internalName)
-
-      // DCE above removes unreachable callsites from the call graph. If the inlining request denotes
-      // such an eliminated callsite, do nothing.
-      if (callGraph.callsites contains request.callsiteInstruction) {
-        val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass,
-          callee.callee, callee.calleeDeclarationClass,
-          request.receiverKnownNotNull, keepLineNumbers = false)
-
-        for (warning <- r) {
-          if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) {
-            val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else ""
-            val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning"
-            backendReporting.inlinerWarning(request.callsitePosition, msg)
-          }
-        }
+      val Right(callee) = request.callsite.callee // collectAndOrderInlineRequests returns callsites with a known callee
+      val warnings = inline(request)
+      for (warning <- warnings) {
+        if (warning.emitWarning(compilerSettings))
+          backendReporting.inlinerWarning(request.callsite.callsitePosition, warning.toString)
       }
     }
+    InlineLog.print()
   }
 
   /**
@@ -69,165 +135,21 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    *   - Always remove the same request when breaking inlining cycles
    *   - Perform inlinings in a consistent order
    */
-  object callsiteOrdering extends Ordering[Callsite] {
-    override def compare(x: Callsite, y: Callsite): Int = {
-      val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName
+  object callsiteOrdering extends Ordering[InlineRequest] {
+    override def compare(x: InlineRequest, y: InlineRequest): Int = {
+      val xCs = x.callsite
+      val yCs = y.callsite
+      val cls = xCs.callsiteClass.internalName compareTo yCs.callsiteClass.internalName
       if (cls != 0) return cls
 
-      val name = x.callsiteMethod.name compareTo y.callsiteMethod.name
+      val name = xCs.callsiteMethod.name compareTo yCs.callsiteMethod.name
       if (name != 0) return name
 
-      val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc
+      val desc = xCs.callsiteMethod.desc compareTo yCs.callsiteMethod.desc
       if (desc != 0) return desc
 
       def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction)
-      pos(x) - pos(y)
-    }
-  }
-
-  /**
-   * Select callsites from the call graph that should be inlined. The resulting list of inlining
-   * requests is allowed to have cycles, and the callsites can appear in any order.
-   */
-  def selectCallsitesForInlining: List[Callsite] = {
-    callsites.valuesIterator.filter({
-      case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) =>
-        val res = doInlineCallsite(callsite)
-
-        if (!res) {
-          if (annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) {
-            // if the callsite is annotated @inline, we report an inline warning even if the underlying
-            // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag).
-            def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined"
-            def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("")
-            if (doRewriteTraitCallsite(callsite))
-              backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg)
-            else if (!safeToInline)
-              backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg)
-            else
-              backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg)
-          } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) {
-            // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete.
-            backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get)
-          }
-        }
-
-        res
-
-      case Callsite(ins, _, _, Left(warning), _, _, _, pos) =>
-        if (warning.emitWarning(compilerSettings))
-          backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning")
-        false
-    }).toList
-  }
-
-  /**
-   * The current inlining heuristics are simple: inline calls to methods annotated @inline.
-   */
-  def doInlineCallsite(callsite: Callsite): Boolean = callsite match {
-    case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) =>
-      if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline
-      else annotatedInline && safeToInline
-
-    case _ => false
-  }
-
-  def rewriteFinalTraitMethodInvocations(): Unit = {
-    // Rewriting final trait method callsites to the implementation class enables inlining.
-    // We cannot just iterate over the values of the `callsites` map because the rewrite changes the
-    // map. Therefore we first copy the values to a list.
-    callsites.values.toList.foreach(rewriteFinalTraitMethodInvocation)
-  }
-
-  /**
-   * True for statically resolved trait callsites that should be rewritten to the static implementation method.
-   */
-  def doRewriteTraitCallsite(callsite: Callsite) = callsite.callee match {
-    case Right(Callee(callee, calleeDeclarationClass, safeToInline, true, annotatedInline, annotatedNoInline, infoWarning)) => true
-    case _ => false
-  }
-
-  /**
-   * Rewrite the INVOKEINTERFACE callsite of a final trait method invocation to INVOKESTATIC of the
-   * corresponding method in the implementation class. This enables inlining final trait methods.
-   *
-   * In a final trait method callsite, the callee is safeToInline and the callee method is abstract
-   * (the receiver type is the interface, so the method is abstract).
-   */
-  def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = {
-    if (doRewriteTraitCallsite(callsite)) {
-      val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, infoWarning)) = callsite.callee
-
-      val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc)
-
-      val implClassInternalName = calleeDeclarationClass.internalName + "$class"
-
-      val selfParamTypeV: Either[OptimizerWarning, ClassBType] = calleeDeclarationClass.info.map(_.inlineInfo.traitImplClassSelfType match {
-        case Some(internalName) => classBTypeFromParsedClassfile(internalName)
-        case None               => calleeDeclarationClass
-      })
-
-      def implClassMethodV(implMethodDescriptor: String): Either[OptimizerWarning, MethodNode] = {
-        byteCodeRepository.methodNode(implClassInternalName, callee.name, implMethodDescriptor).map(_._1)
-      }
-
-      // The rewrite reading the implementation class and the implementation method from the bytecode
-      // repository. If either of the two fails, the rewrite is not performed.
-      val res = for {
-        selfParamType        <- selfParamTypeV
-        implMethodDescriptor =  asm.Type.getMethodDescriptor(asm.Type.getReturnType(callee.desc), selfParamType.toASMType +: traitMethodArgumentTypes: _*)
-        implClassMethod      <- implClassMethodV(implMethodDescriptor)
-        implClassBType       =  classBTypeFromParsedClassfile(implClassInternalName)
-        selfTypeOk           <- calleeDeclarationClass.isSubtypeOf(selfParamType)
-      } yield {
-
-        // The self parameter type may be incompatible with the trait type.
-        //   trait T { self: S => def foo = 1 }
-        // The $self parameter type of T$class.foo is S, which may be unrelated to T. If we re-write
-        // a call to T.foo to T$class.foo, we need to cast the receiver to S, otherwise we get a
-        // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the
-        // receiver value and add a cast to the self type after each.
-        if (!selfTypeOk) {
-          // there's no need to run eliminateUnreachableCode here. building the call graph does that
-          // already, no code can become unreachable in the meantime.
-          val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new SourceInterpreter)
-          val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length)
-          for (i <- receiverValue.insns.asScala) {
-            val cast = new TypeInsnNode(CHECKCAST, selfParamType.internalName)
-            callsite.callsiteMethod.instructions.insert(i, cast)
-          }
-        }
-
-        val newCallsiteInstruction = new MethodInsnNode(INVOKESTATIC, implClassInternalName, callee.name, implMethodDescriptor, false)
-        callsite.callsiteMethod.instructions.insert(callsite.callsiteInstruction, newCallsiteInstruction)
-        callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction)
-
-        callGraph.callsites.remove(callsite.callsiteInstruction)
-        val staticCallsite = Callsite(
-          callsiteInstruction = newCallsiteInstruction,
-          callsiteMethod      = callsite.callsiteMethod,
-          callsiteClass       = callsite.callsiteClass,
-          callee              = Right(Callee(
-            callee                 = implClassMethod,
-            calleeDeclarationClass = implClassBType,
-            safeToInline           = true,
-            safeToRewrite          = false,
-            annotatedInline        = annotatedInline,
-            annotatedNoInline      = annotatedNoInline,
-            calleeInfoWarning      = infoWarning)),
-          argInfos            = Nil,
-          callsiteStackHeight = callsite.callsiteStackHeight,
-          receiverKnownNotNull = callsite.receiverKnownNotNull,
-          callsitePosition = callsite.callsitePosition
-        )
-        callGraph.callsites(newCallsiteInstruction) = staticCallsite
-      }
-
-      for (warning <- res.left) {
-        val Right(callee) = callsite.callee
-        val newCallee = callee.copy(calleeInfoWarning = Some(RewriteTraitCallToStaticImplMethodFailed(calleeDeclarationClass.internalName, callee.callee.name, callee.callee.desc, warning)))
-        callGraph.callsites(callsite.callsiteInstruction) = callsite.copy(callee = Right(newCallee))
-      }
+      pos(xCs) - pos(yCs)
     }
   }
 
@@ -238,15 +160,13 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    * The resulting list is sorted such that the leaves of the inline request graph are on the left.
    * Once these leaves are inlined, the successive elements will be leaves, etc.
    */
-  private def collectAndOrderInlineRequests: List[Callsite] = {
-    val requests = selectCallsitesForInlining
+  private def collectAndOrderInlineRequests: List[InlineRequest] = {
+    val requestsByMethod = selectCallsitesForInlining withDefaultValue Set.empty
+
+    val elided = mutable.Set.empty[InlineRequest]
+    def nonElidedRequests(methodNode: MethodNode): Set[InlineRequest] = requestsByMethod(methodNode) diff elided
 
-    // This map is an index to look up the inlining requests for a method. The value sets are mutable
-    // to allow removing elided requests (to break inlining cycles). The map itself is mutable to
-    // allow efficient building: requests.groupBy would build values as List[Callsite] that need to
-    // be transformed to mutable sets.
-    val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty)
-    for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r
+    def allCallees(r: InlineRequest): Set[MethodNode] = r.post.flatMap(allCallees).toSet + r.callsite.callee.get.callee
 
     /**
      * Break cycles in the inline request graph by removing callsites.
@@ -254,236 +174,454 @@ class Inliner[BT <: BTypes](val btypes: BT) {
      * The list `requests` is traversed left-to-right, removing those callsites that are part of a
      * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map.
      */
-    def breakInlineCycles(requests: List[Callsite]): List[Callsite] = {
+    def breakInlineCycles: List[InlineRequest] = {
       // is there a path of inline requests from start to goal?
-      def isReachable(start: MethodNode, goal: MethodNode): Boolean = {
-        @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match {
-          case x :: xs =>
+      def isReachable(start: Set[MethodNode], goal: MethodNode): Boolean = {
+        @tailrec def reachableImpl(check: Set[MethodNode], visited: Set[MethodNode]): Boolean = {
+          if (check.isEmpty) false
+          else {
+            val x = check.head
             if (x == goal) true
-            else if (visited(x)) reachableImpl(xs, visited)
+            else if (visited(x)) reachableImpl(check - x, visited)
             else {
-              val callees = inlineRequestsForMethod(x).map(_.callee.get.callee)
-              reachableImpl(xs ::: callees.toList, visited + x)
+              val callees = nonElidedRequests(x).flatMap(allCallees)
+              reachableImpl(check - x ++ callees, visited + x)
             }
-
-          case Nil =>
-            false
+          }
         }
-        reachableImpl(List(start), Set.empty)
+        reachableImpl(start, Set.empty)
       }
 
-      val result = new mutable.ListBuffer[Callsite]()
+      val result = new mutable.ListBuffer[InlineRequest]()
+      val requests = requestsByMethod.valuesIterator.flatten.toArray
       // sort the inline requests to ensure that removing requests is deterministic
-      for (r <- requests.sorted(callsiteOrdering)) {
+      java.util.Arrays.sort(requests, callsiteOrdering)
+      for (r <- requests) {
         // is there a chain of inlining requests that would inline the callsite method into the callee?
-        if (isReachable(r.callee.get.callee, r.callsiteMethod))
-          inlineRequestsForMethod(r.callsiteMethod) -= r
+        if (isReachable(allCallees(r), r.callsite.callsiteMethod))
+          elided += r
         else
           result += r
+        ()
       }
       result.toList
     }
 
     // sort the remaining inline requests such that the leaves appear first, then those requests
     // that become leaves, etc.
-    def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = {
+    def leavesFirst(requests: List[InlineRequest], visited: Set[InlineRequest] = Set.empty): List[InlineRequest] = {
       if (requests.isEmpty) Nil
       else {
         val (leaves, others) = requests.partition(r => {
-          val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee)
-          inlineRequestsForCallee.forall(visited)
+          val inlineRequestsForCallees = allCallees(r).flatMap(nonElidedRequests)
+          inlineRequestsForCallees.forall(visited)
         })
         assert(leaves.nonEmpty, requests)
         leaves ::: leavesFirst(others, visited ++ leaves)
       }
     }
 
-    leavesFirst(breakInlineCycles(requests))
+    leavesFirst(breakInlineCycles)
   }
 
-
   /**
-   * Copy and adapt the instructions of a method to a callsite.
+   * Given an InlineRequest(mainCallsite, post = List(postCallsite)), the postCallsite is a callsite
+   * in the method `mainCallsite.callee`. Once the mainCallsite is inlined into the target method
+   * (mainCallsite.callsiteMethod), we need to find the cloned callsite that corresponds to the
+   * postCallsite so we can inline that into the target method as well.
    *
-   * Preconditions:
-   *   - The maxLocals and maxStack values of the callsite method are correctly computed
-   *   - The callsite method contains no unreachable basic blocks, i.e., running an [[Analyzer]]
-   *     does not produce any `null` frames
+   * However, it is possible that there is no cloned callsite at all that corresponds to the
+   * postCallsite, for example if the corresponding callsite already inlined. Example:
+   *
+   *   def a() = 1
+   *   def b() = a() + 2
+   *   def c() = b() + 3
+   *   def d() = c() + 4
+   *
+   * We have the following callsite objects in the call graph:
+   *
+   *   c1 = a() in b
+   *   c2 = b() in c
+   *   c3 = c() in d
    *
-   * @param callsiteInstruction     The invocation instruction
-   * @param callsiteStackHeight     The stack height at the callsite
-   * @param callsiteMethod          The method in which the invocation occurs
-   * @param callsiteClass           The class in which the callsite method is defined
-   * @param callee                  The invoked method
-   * @param calleeDeclarationClass  The class in which the invoked method is defined
-   * @param receiverKnownNotNull    `true` if the receiver is known to be non-null
-   * @param keepLineNumbers         `true` if LineNumberNodes should be copied to the call site
-   * @return                        `Some(message)` if inlining cannot be performed, `None` otherwise
+   * Assume we have the following inline request
+   *   r = InlineRequest(c3,
+   *         post = List(InlineRequest(c2,
+   *           post = List(InlineRequest(c1, post = Nil)))))
+   *
+   * But before inlining r, assume a separate InlineRequest(c2, post = Nil) is inlined first. We get
+   *
+   *   c1' = a() in c                  // added to the call graph
+   *   c1.inlinedClones += (c1' at c2) // remember that c1' was created when inlining c2
+   *   ~c2~                            // c2 is removed from the call graph
+   *
+   * If we now inline r, we first inline c3. We get
+   *
+   *   c1'' = a() in d                   // added to call graph
+   *   c1'.inlinedClones += (c1'' at c3) // remember that c1'' was created when inlining c3
+   *   ~c3~
+   *
+   * Now we continue with the post-requests for r, i.e. c2.
+   *   - we try to find the clone of c2 that was created when inlining c3 - but there is none. c2
+   *     was already inlined before
+   *   - we continue with the post-request of c2: c1
+   *     - we search for the callsite of c1 that was cloned when inlining c2, we find c1'
+   *     - recursively we search for the callsite of c1' that was cloned when inlining c3, we find c1''
+   *     - so we create an inline request for c1''
    */
-  def inline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-             callee: MethodNode, calleeDeclarationClass: ClassBType,
-             receiverKnownNotNull: Boolean, keepLineNumbers: Boolean): Option[CannotInlineWarning] = {
-    canInline(callsiteInstruction, callsiteStackHeight, callsiteMethod, callsiteClass, callee, calleeDeclarationClass) orElse {
-      // New labels for the cloned instructions
-      val labelsMap = cloneLabels(callee)
-      val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap)
-      if (!keepLineNumbers) {
-        removeLineNumberNodes(clonedInstructions)
+  def adaptPostRequestForMainCallsite(post: InlineRequest, mainCallsite: Callsite): List[InlineRequest] = {
+    def impl(post: InlineRequest, at: Callsite): List[InlineRequest] = {
+      post.callsite.inlinedClones.find(_.clonedWhenInlining == at) match {
+        case Some(clonedCallsite) =>
+          List(InlineRequest(clonedCallsite.callsite, post.post, post.reason))
+        case None =>
+          post.post.flatMap(impl(_, post.callsite)).flatMap(impl(_, at))
       }
+    }
+    impl(post, mainCallsite)
+  }
+
+  class UndoLog(active: Boolean = true) {
+    import java.util.{ ArrayList => JArrayList }
+
+    private var actions = List.empty[() => Unit]
+    private var methodStateSaved = false
+
+    def apply(a: => Unit): Unit = if (active) actions = (() => a) :: actions
+    def rollback(): Unit = if (active) actions.foreach(_.apply())
 
-      // local vars in the callee are shifted by the number of locals at the callsite
-      val localVarShift = callsiteMethod.maxLocals
-      clonedInstructions.iterator.asScala foreach {
-        case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift
-        case iinc: IincInsnNode          => iinc.`var` += localVarShift
-        case _ => ()
+    def saveMethodState(methodNode: MethodNode): Unit = if (active && !methodStateSaved) {
+      methodStateSaved = true
+      val currentInstructions = methodNode.instructions.toArray
+      val currentLocalVariables = new JArrayList(methodNode.localVariables)
+      val currentTryCatchBlocks = new JArrayList(methodNode.tryCatchBlocks)
+      val currentMaxLocals = methodNode.maxLocals
+      val currentMaxStack = methodNode.maxStack
+
+      apply {
+        // `methodNode.instructions.clear()` doesn't work: it keeps the `prev` / `next` / `index` of
+        // instruction nodes. `instructions.removeAll(true)` would work, but is not public.
+        methodNode.instructions.iterator.asScala.toList.foreach(methodNode.instructions.remove)
+        for (i <- currentInstructions) methodNode.instructions.add(i)
+
+        methodNode.localVariables.clear()
+        methodNode.localVariables.addAll(currentLocalVariables)
+
+        methodNode.tryCatchBlocks.clear()
+        methodNode.tryCatchBlocks.addAll(currentTryCatchBlocks)
+
+        methodNode.maxLocals = currentMaxLocals
+        methodNode.maxStack = currentMaxStack
       }
+    }
+  }
 
-      // add a STORE instruction for each expected argument, including for THIS instance if any
-      val argStores = new InsnList
-      var nextLocalIndex = callsiteMethod.maxLocals
-      if (!isStaticMethod(callee)) {
-        if (!receiverKnownNotNull) {
-          argStores.add(new InsnNode(DUP))
-          val nonNullLabel = newLabelNode
-          argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel))
-          argStores.add(new InsnNode(ACONST_NULL))
-          argStores.add(new InsnNode(ATHROW))
-          argStores.add(nonNullLabel)
+  val NoUndoLogging = new UndoLog(active = false)
+
+  /**
+   * Inline the callsite of an inlining request and its post-inlining requests.
+   *
+   * @return An inliner warning for each callsite that could not be inlined.
+   */
+  def inline(request: InlineRequest, undo: UndoLog = NoUndoLogging): List[CannotInlineWarning] = {
+    def doInline(undo: UndoLog, callRollback: Boolean = false): List[CannotInlineWarning] = {
+      InlineLog.withInlineLogging(request) {
+        inlineCallsite(request.callsite, undo)
+      } {
+        val postRequests = request.post.flatMap(adaptPostRequestForMainCallsite(_, request.callsite))
+        val warnings = postRequests.flatMap(inline(_, undo))
+        if (callRollback && warnings.nonEmpty) {
+          undo.rollback()
+          InlineLog(InlineLogRollback(request, warnings))
         }
-        argStores.add(new VarInsnNode(ASTORE, nextLocalIndex))
-        nextLocalIndex += 1
+        warnings
       }
+    }
 
-      // We just use an asm.Type here, no need to create the MethodBType.
-      val calleAsmType = asm.Type.getMethodType(callee.desc)
+    def inlinedByPost(insns: List[AbstractInsnNode]): Boolean =
+      insns.nonEmpty && insns.forall(ins => request.post.exists(_.callsite.callsiteInstruction == ins))
 
-      for(argTp <- calleAsmType.getArgumentTypes) {
-        val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp
-        argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack
-        nextLocalIndex += argTp.getSize
+    canInlineCallsite(request.callsite) match {
+      case None =>
+        doInline(undo)
+
+      case Some((_, illegalAccessInsns)) if inlinedByPost(illegalAccessInsns) =>
+        // speculatively inline, roll back if an illegalAccessInsn cannot be eliminated
+        if (undo == NoUndoLogging) doInline(new UndoLog(), callRollback = true)
+        else doInline(undo)
+
+      case Some((w, _)) =>
+        InlineLog(InlineLogFail(request, w))
+        List(w)
+    }
+  }
+
+  /**
+   * Copy and adapt the instructions of a method to a callsite.
+   *
+   * Preconditions:
+   *   - The callsite can safely be inlined (canInlineBody is true)
+   *   - The maxLocals and maxStack values of the callsite method are correctly computed
+   *
+   * @return A map associating instruction nodes of the callee with the corresponding cloned
+   *         instruction in the callsite method.
+   */
+  def inlineCallsite(callsite: Callsite, undo: UndoLog = NoUndoLogging): Unit = {
+    import callsite.{callsiteClass, callsiteMethod, callsiteInstruction, receiverKnownNotNull, callsiteStackHeight}
+    val Right(callsiteCallee) = callsite.callee
+    import callsiteCallee.{callee, calleeDeclarationClass, sourceFilePath}
+
+    // Inlining requires the callee not to have unreachable code, the analyzer used below should not
+    // return any `null` frames. Note that inlining a method can create unreachable code. Example:
+    //   def f = throw e
+    //   def g = f; println() // println is unreachable after inlining f
+    // If we have an inline request for a call to g, and f has been already inlined into g, we
+    // need to run DCE on g's body before inlining g.
+    localOpt.minimalRemoveUnreachableCode(callee, calleeDeclarationClass.internalName)
+
+    // If the callsite was eliminated by DCE, do nothing.
+    if (!callGraph.containsCallsite(callsite)) return
+
+    // New labels for the cloned instructions
+    val labelsMap = cloneLabels(callee)
+    val sameSourceFile = sourceFilePath match {
+      case Some(calleeSource) => byteCodeRepository.compilingClasses.get(callsiteClass.internalName) match {
+        case Some((_, `calleeSource`)) => true
+        case _ => false
       }
+      case _ => false
+    }
+    val (clonedInstructions, instructionMap, targetHandles) = cloneInstructions(callee, labelsMap, keepLineNumbers = sameSourceFile)
+
+    // local vars in the callee are shifted by the number of locals at the callsite
+    val localVarShift = callsiteMethod.maxLocals
+    clonedInstructions.iterator.asScala foreach {
+      case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift
+      case iinc: IincInsnNode          => iinc.`var` += localVarShift
+      case _ => ()
+    }
 
-      clonedInstructions.insert(argStores)
-
-      // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label.
-      val postCallLabel = newLabelNode
-      clonedInstructions.add(postCallLabel)
-
-      // replace xRETURNs:
-      //   - store the return value (if any)
-      //   - clear the stack of the inlined method (insert DROPs)
-      //   - load the return value
-      //   - GOTO postCallLabel
-
-      val returnType = calleAsmType.getReturnType
-      val hasReturnValue = returnType.getSort != asm.Type.VOID
-      val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals
-      nextLocalIndex += returnType.getSize
-
-      def returnValueStore(returnInstruction: AbstractInsnNode) = {
-        val opc = returnInstruction.getOpcode match {
-          case IRETURN => ISTORE
-          case LRETURN => LSTORE
-          case FRETURN => FSTORE
-          case DRETURN => DSTORE
-          case ARETURN => ASTORE
-        }
-        new VarInsnNode(opc, returnValueIndex)
+    // add a STORE instruction for each expected argument, including for THIS instance if any
+    val argStores = new InsnList
+    var nextLocalIndex = callsiteMethod.maxLocals
+    if (!isStaticMethod(callee)) {
+      if (!receiverKnownNotNull) {
+        argStores.add(new InsnNode(DUP))
+        val nonNullLabel = newLabelNode
+        argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel))
+        argStores.add(new InsnNode(ACONST_NULL))
+        argStores.add(new InsnNode(ATHROW))
+        argStores.add(nonNullLabel)
       }
+      argStores.add(new VarInsnNode(ASTORE, nextLocalIndex))
+      nextLocalIndex += 1
+    }
 
-      // We run an interpreter to know the stack height at each xRETURN instruction and the sizes
-      // of the values on the stack.
-      val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName)
+    // We just use an asm.Type here, no need to create the MethodBType.
+    val calleAsmType = asm.Type.getMethodType(callee.desc)
+    val calleeParamTypes = calleAsmType.getArgumentTypes
 
-      for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) {
-        val frame = analyzer.frameAt(originalReturn)
-        var stackHeight = frame.getStackSize
+    for(argTp <- calleeParamTypes) {
+      val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp
+      argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack
+      nextLocalIndex += argTp.getSize
+    }
 
-        val inlinedReturn = instructionMap(originalReturn)
-        val returnReplacement = new InsnList
+    clonedInstructions.insert(argStores)
+
+    // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label.
+    val postCallLabel = newLabelNode
+    clonedInstructions.add(postCallLabel)
+
+    // replace xRETURNs:
+    //   - store the return value (if any)
+    //   - clear the stack of the inlined method (insert DROPs)
+    //   - load the return value
+    //   - GOTO postCallLabel
+
+    val returnType = calleAsmType.getReturnType
+    val hasReturnValue = returnType.getSort != asm.Type.VOID
+    val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals
+    nextLocalIndex += returnType.getSize
+
+    def returnValueStore(returnInstruction: AbstractInsnNode) = {
+      val opc = returnInstruction.getOpcode match {
+        case IRETURN => ISTORE
+        case LRETURN => LSTORE
+        case FRETURN => FSTORE
+        case DRETURN => DSTORE
+        case ARETURN => ASTORE
+      }
+      new VarInsnNode(opc, returnValueIndex)
+    }
 
-        def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize)
+    // We run an interpreter to know the stack height at each xRETURN instruction and the sizes
+    // of the values on the stack.
+    // We don't need to worry about the method being too large for running an analysis. Callsites of
+    // large methods are not added to the call graph.
+    val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName)
 
-        // for non-void methods, store the stack top into the return local variable
-        if (hasReturnValue) {
-          returnReplacement add returnValueStore(originalReturn)
-          stackHeight -= 1
-        }
+    for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) {
+      val frame = analyzer.frameAt(originalReturn)
+      var stackHeight = frame.getStackSize
 
-        // drop the rest of the stack
-        for (i <- 0 until stackHeight) drop(i)
+      val inlinedReturn = instructionMap(originalReturn)
+      val returnReplacement = new InsnList
 
-        returnReplacement add new JumpInsnNode(GOTO, postCallLabel)
-        clonedInstructions.insert(inlinedReturn, returnReplacement)
-        clonedInstructions.remove(inlinedReturn)
-      }
+      def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize)
 
-      // Load instruction for the return value
+      // for non-void methods, store the stack top into the return local variable
       if (hasReturnValue) {
-        val retVarLoad = {
-          val opc = returnType.getOpcode(ILOAD)
-          new VarInsnNode(opc, returnValueIndex)
-        }
-        clonedInstructions.insert(postCallLabel, retVarLoad)
+        returnReplacement add returnValueStore(originalReturn)
+        stackHeight -= 1
       }
 
-      callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions)
-      callsiteMethod.instructions.remove(callsiteInstruction)
-
-      callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava)
-      callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava)
-
-      // Add all invocation instructions and closure instantiations that were inlined to the call graph
-      callee.instructions.iterator().asScala foreach {
-        case originalCallsiteIns: MethodInsnNode =>
-          callGraph.callsites.get(originalCallsiteIns) match {
-            case Some(originalCallsite) =>
-              val newCallsiteIns = instructionMap(originalCallsiteIns).asInstanceOf[MethodInsnNode]
-              callGraph.callsites(newCallsiteIns) = Callsite(
-                callsiteInstruction = newCallsiteIns,
-                callsiteMethod = callsiteMethod,
-                callsiteClass = callsiteClass,
-                callee = originalCallsite.callee,
-                argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them)
-                callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight,
-                receiverKnownNotNull = originalCallsite.receiverKnownNotNull,
-                callsitePosition = originalCallsite.callsitePosition
-              )
-
-            case None =>
-          }
+      // drop the rest of the stack
+      for (i <- 0 until stackHeight) drop(i)
 
-        case indy: InvokeDynamicInsnNode =>
-          callGraph.closureInstantiations.get(indy) match {
-            case Some(closureInit) =>
-              val newIndy = instructionMap(indy).asInstanceOf[InvokeDynamicInsnNode]
-              callGraph.closureInstantiations(newIndy) = ClosureInstantiation(closureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass)
-
-            case None =>
-          }
+      returnReplacement add new JumpInsnNode(GOTO, postCallLabel)
+      clonedInstructions.insert(inlinedReturn, returnReplacement)
+      clonedInstructions.remove(inlinedReturn)
+    }
 
-        case _ =>
+    // Load instruction for the return value
+    if (hasReturnValue) {
+      val retVarLoad = {
+        val opc = returnType.getOpcode(ILOAD)
+        new VarInsnNode(opc, returnValueIndex)
       }
-      // Remove the elided invocation from the call graph
-      callGraph.callsites.remove(callsiteInstruction)
+      clonedInstructions.insert(postCallLabel, retVarLoad)
+    }
 
-      // Inlining a method body can render some code unreachable, see example above (in runInliner).
-      unreachableCodeEliminated -= callsiteMethod
+    undo.saveMethodState(callsiteMethod)
 
-      callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals
-      callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight)
+    callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions)
+    callsiteMethod.instructions.remove(callsiteInstruction)
 
-      None
+    callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name, localVarShift).asJava)
+    // prepend the handlers of the callee. the order of handlers matters: when an exception is thrown
+    // at some instruction, the first handler guarding that instruction and having a matching exception
+    // type is executed. prepending the callee's handlers makes sure to test those handlers first if
+    // an exception is thrown in the inlined code.
+    callsiteMethod.tryCatchBlocks.addAll(0, cloneTryCatchBlockNodes(callee, labelsMap).asJava)
+
+    callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals
+    val maxStackOfInlinedCode = {
+      // One slot per value is correct for long / double, see comment in the `analysis` package object.
+      val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1)
+      callee.maxStack + callsiteStackHeight - numStoredArgs
+    }
+    val stackHeightAtNullCheck = {
+      // When adding a null check for the receiver, a DUP is inserted, which might cause a new maxStack.
+      // If the callsite has other argument values than the receiver on the stack, these are pop'ed
+      // and stored into locals before the null check, so in that case the maxStack doesn't grow.
+      val stackSlotForNullCheck = if (!isStaticMethod(callee) && !receiverKnownNotNull && calleeParamTypes.isEmpty) 1 else 0
+      callsiteStackHeight + stackSlotForNullCheck
     }
+
+    callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, math.max(stackHeightAtNullCheck, maxStackOfInlinedCode))
+
+    val added = addIndyLambdaImplMethod(callsiteClass.internalName, targetHandles)
+    undo { removeIndyLambdaImplMethod(callsiteClass.internalName, added) }
+
+    callGraph.addIfMissing(callee, calleeDeclarationClass)
+
+    def mapArgInfo(argInfo: (Int, ArgInfo)): Option[(Int, ArgInfo)] = argInfo match {
+      case lit @ (_, FunctionLiteral)             => Some(lit)
+      case (argIndex, ForwardedParam(paramIndex)) => callsite.argInfos.get(paramIndex).map((argIndex, _))
+    }
+
+    // Add all invocation instructions and closure instantiations that were inlined to the call graph
+    callGraph.callsites(callee).valuesIterator foreach { originalCallsite =>
+      val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode]
+      val argInfos = originalCallsite.argInfos flatMap mapArgInfo
+      val newCallsite = originalCallsite.copy(
+        callsiteInstruction = newCallsiteIns,
+        callsiteMethod = callsiteMethod,
+        callsiteClass = callsiteClass,
+        argInfos = argInfos,
+        callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight
+      )
+      val clonedCallsite = ClonedCallsite(newCallsite, callsite)
+      originalCallsite.inlinedClones += clonedCallsite
+      callGraph.addCallsite(newCallsite)
+      undo {
+        originalCallsite.inlinedClones -= clonedCallsite
+        callGraph.removeCallsite(newCallsite.callsiteInstruction, newCallsite.callsiteMethod)
+      }
+    }
+
+    callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit =>
+      val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode]
+      val capturedArgInfos = originalClosureInit.capturedArgInfos flatMap mapArgInfo
+      val newClosureInit = ClosureInstantiation(
+        originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy),
+        callsiteMethod,
+        callsiteClass,
+        capturedArgInfos)
+      originalClosureInit.inlinedClones += newClosureInit
+      callGraph.addClosureInstantiation(newClosureInit)
+      undo {
+        callGraph.removeClosureInstantiation(newClosureInit.lambdaMetaFactoryCall.indy, newClosureInit.ownerMethod)
+      }
+    }
+
+    // Remove the elided invocation from the call graph
+    callGraph.removeCallsite(callsiteInstruction, callsiteMethod)
+    undo { callGraph.addCallsite(callsite) }
+
+    // Inlining a method body can render some code unreachable, see example above in this method.
+    unreachableCodeEliminated -= callsiteMethod
   }
 
   /**
-   * Check whether an inling can be performed. Parmeters are described in method [[inline]].
+   * Check whether an inlining can be performed. This method performs tests that don't change even
+   * if the body of the callee is changed by the inliner / optimizer, so it can be used early
+   * (when looking at the call graph and collecting inline requests for the program).
+   *
+   * The tests that inspect the callee's instructions are implemented in method `canInlineBody`,
+   * which is queried when performing an inline.
+   *
    * @return `Some(message)` if inlining cannot be performed, `None` otherwise
    */
-  def canInline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-                callee: MethodNode, calleeDeclarationClass: ClassBType): Option[CannotInlineWarning] = {
+  def earlyCanInlineCheck(callsite: Callsite): Option[CannotInlineWarning] = {
+    import callsite.{callsiteMethod, callsiteClass}
+    val Right(callsiteCallee) = callsite.callee
+    import callsiteCallee.{callee, calleeDeclarationClass}
+
+    if (isSynchronizedMethod(callee)) {
+      // Could be done by locking on the receiver, wrapping the inlined code in a try and unlocking
+      // in finally. But it's probably not worth the effort, scala never emits synchronized methods.
+      Some(SynchronizedMethod(calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated))
+    } else if (isStrictfpMethod(callsiteMethod) != isStrictfpMethod(callee)) {
+      Some(StrictfpMismatch(
+        calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated,
+        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc))
+    } else
+      None
+  }
+
+  /**
+   * Check whether the body of the callee contains any instructions that prevent the callsite from
+   * being inlined. See also method `earlyCanInlineCheck`.
+   *
+   * The result of this check depends on changes to the callee method's body. For example, if the
+   * callee initially invokes a private method, it cannot be inlined into a different class. If the
+   * private method is inlined into the callee, inlining the callee becomes possible. Therefore
+   * we don't query it while traversing the call graph and selecting callsites to inline - it might
+   * rule out callsites that can be inlined just fine.
+   *
+   * Returns
+   *  - `None` if the callsite can be inlined
+   *  - `Some((message, Nil))` if there was an issue performing the access checks, for example
+   *    because of a missing classfile
+   *  - `Some((message, instructions))` if inlining `instructions` into the callsite method would
+   *    cause an IllegalAccessError
+   */
+  def canInlineCallsite(callsite: Callsite): Option[(CannotInlineWarning, List[AbstractInsnNode])] = {
+    import callsite.{callsiteInstruction, callsiteMethod, callsiteClass, callsiteStackHeight}
+    val Right(callsiteCallee) = callsite.callee
+    import callsiteCallee.{callee, calleeDeclarationClass}
 
     def calleeDesc = s"${callee.name} of type ${callee.desc} in ${calleeDeclarationClass.internalName}"
     def methodMismatch = s"Wrong method node for inlining ${textify(callsiteInstruction)}: $calleeDesc"
@@ -511,31 +649,30 @@ class Inliner[BT <: BTypes](val btypes: BT) {
     }
 
     if (codeSizeOKForInlining(callsiteMethod, callee)) {
-      Some(ResultingMethodTooLarge(
-        calleeDeclarationClass.internalName, callee.name, callee.desc,
-        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc))
-    } else if (isSynchronizedMethod(callee)) {
-      // Could be done by locking on the receiver, wrapping the inlined code in a try and unlocking
-      // in finally. But it's probably not worth the effort, scala never emits synchronized methods.
-      Some(SynchronizedMethod(calleeDeclarationClass.internalName, callee.name, callee.desc))
-    } else if (isStrictfpMethod(callsiteMethod) != isStrictfpMethod(callee)) {
-      Some(StrictfpMismatch(
-        calleeDeclarationClass.internalName, callee.name, callee.desc,
-        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc))
+      val warning = ResultingMethodTooLarge(
+        calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated,
+        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)
+      Some((warning, Nil))
     } else if (!callee.tryCatchBlocks.isEmpty && stackHasNonParameters) {
-      Some(MethodWithHandlerCalledOnNonEmptyStack(
-        calleeDeclarationClass.internalName, callee.name, callee.desc,
-        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc))
-    } else findIllegalAccess(callee.instructions, calleeDeclarationClass, callsiteClass) map {
-      case (illegalAccessIns, None) =>
-        IllegalAccessInstruction(
-          calleeDeclarationClass.internalName, callee.name, callee.desc,
-          callsiteClass.internalName, illegalAccessIns)
-
-      case (illegalAccessIns, Some(warning)) =>
-        IllegalAccessCheckFailed(
-          calleeDeclarationClass.internalName, callee.name, callee.desc,
-          callsiteClass.internalName, illegalAccessIns, warning)
+      val warning = MethodWithHandlerCalledOnNonEmptyStack(
+        calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated,
+        callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)
+      Some((warning, Nil))
+    } else findIllegalAccess(callee.instructions, calleeDeclarationClass, callsiteClass) match {
+      case Right(Nil) =>
+        None
+
+      case Right(illegalAccessInsns) =>
+        val warning = IllegalAccessInstruction(
+          calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated,
+          callsiteClass.internalName, illegalAccessInsns.head)
+        Some((warning, illegalAccessInsns))
+
+      case Left((illegalAccessIns, cause)) =>
+        val warning = IllegalAccessCheckFailed(
+          calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated,
+          callsiteClass.internalName, illegalAccessIns, cause)
+        Some((warning, Nil))
     }
   }
 
@@ -545,7 +682,7 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    *  (A2) C and D are members of the same run-time package
    */
   def classIsAccessible(accessed: BType, from: ClassBType): Either[OptimizerWarning, Boolean] = (accessed: @unchecked) match {
-    // TODO: A2 requires "same run-time package", which seems to be package + classloader (JMVS 5.3.). is the below ok?
+    // TODO: A2 requires "same run-time package", which seems to be package + classloader (JVMS 5.3.). is the below ok?
     case c: ClassBType     => c.isPublic.map(_ || c.packageInternalName == from.packageInternalName)
     case a: ArrayBType     => classIsAccessible(a.elementType, from)
     case _: PrimitiveBType => Right(true)
@@ -587,7 +724,7 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    * type from there (https://github.com/scala-opt/scala/issues/13).
    */
   def memberIsAccessible(memberFlags: Int, memberDeclClass: ClassBType, memberRefClass: ClassBType, from: ClassBType): Either[OptimizerWarning, Boolean] = {
-    // TODO: B3 requires "same run-time package", which seems to be package + classloader (JMVS 5.3.). is the below ok?
+    // TODO: B3 requires "same run-time package", which seems to be package + classloader (JVMS 5.3.). is the below ok?
     def samePackageAsDestination = memberDeclClass.packageInternalName == from.packageInternalName
     def targetObjectConformsToDestinationClass = false // needs type propagation analysis, see above
 
@@ -624,13 +761,14 @@ class Inliner[BT <: BTypes](val btypes: BT) {
   }
 
   /**
-   * Returns the first instruction in the `instructions` list that would cause a
-   * [[java.lang.IllegalAccessError]] when inlined into the `destinationClass`.
-   *
-   * If validity of some instruction could not be checked because an error occurred, the instruction
-   * is returned together with a warning message that describes the problem.
+   * Returns
+   *   - `Right(Nil)` if all instructions can be safely inlined
+   *   - `Right(insns)` if inlining any of `insns` would cause a [[java.lang.IllegalAccessError]]
+   *     when inlined into the `destinationClass`
+   *   - `Left((insn, warning))` if validity of some instruction could not be checked because an
+   *     error occurred
    */
-  def findIllegalAccess(instructions: InsnList, calleeDeclarationClass: ClassBType, destinationClass: ClassBType): Option[(AbstractInsnNode, Option[OptimizerWarning])] = {
+  def findIllegalAccess(instructions: InsnList, calleeDeclarationClass: ClassBType, destinationClass: ClassBType): Either[(AbstractInsnNode, OptimizerWarning), List[AbstractInsnNode]] = {
     /**
      * Check if `instruction` can be transplanted to `destinationClass`.
      *
@@ -759,17 +897,15 @@ class Inliner[BT <: BTypes](val btypes: BT) {
     }
 
     val it = instructions.iterator.asScala
-    @tailrec def find: Option[(AbstractInsnNode, Option[OptimizerWarning])] = {
-      if (!it.hasNext) None // all instructions are legal
-      else {
-        val i = it.next()
-        isLegal(i) match {
-          case Left(warning) => Some((i, Some(warning))) // checking isLegal for i failed
-          case Right(false)  => Some((i, None))          // an illegal instruction was found
-          case _             => find
-        }
+    val illegalAccess = mutable.ListBuffer.empty[AbstractInsnNode]
+    while (it.hasNext) {
+      val i = it.next()
+      isLegal(i) match {
+        case Left(warning) => return Left((i, warning)) // checking isLegal for i failed
+        case Right(false)  => illegalAccess += i        // an illegal instruction was found
+        case _ =>
       }
     }
-    find
+    Right(illegalAccess.toList)
   }
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala
new file mode 100644
index 0000000000..63360e17ff
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala
@@ -0,0 +1,339 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2014 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package backend.jvm
+package opt
+
+import scala.annotation.tailrec
+import scala.collection.JavaConverters._
+import scala.tools.asm.Opcodes
+import scala.tools.asm.tree.{AbstractInsnNode, MethodInsnNode, MethodNode}
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.tools.nsc.backend.jvm.BackendReporting.{CalleeNotFinal, OptimizerWarning}
+
+class InlinerHeuristics[BT <: BTypes](val bTypes: BT) {
+  import bTypes._
+  import callGraph._
+
+  final case class InlineRequest(callsite: Callsite, post: List[InlineRequest], reason: String) {
+    // invariant: all post inline requests denote callsites in the callee of the main callsite
+    for (pr <- post) assert(pr.callsite.callsiteMethod == callsite.callee.get.callee, s"Callsite method mismatch: main $callsite - post ${pr.callsite}")
+  }
+
+  def canInlineFromSource(sourceFilePath: Option[String]) = compilerSettings.optInlineGlobal || sourceFilePath.isDefined
+
+  /**
+   * Select callsites from the call graph that should be inlined, grouped by the containing method.
+   * Cyclic inlining requests are allowed, the inliner will eliminate requests to break cycles.
+   */
+  def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = {
+    // We should only create inlining requests for callsites being compiled (not for callsites in
+    // classes on the classpath). The call graph may contain callsites of classes parsed from the
+    // classpath. In order to get only the callsites being compiled, we start at the map of
+    // compilingClasses in the byteCodeRepository.
+    val compilingMethods = for {
+      (classNode, _) <- byteCodeRepository.compilingClasses.valuesIterator
+      methodNode     <- classNode.methods.iterator.asScala
+    } yield methodNode
+
+    compilingMethods.map(methodNode => {
+      var requests = Set.empty[InlineRequest]
+      callGraph.callsites(methodNode).valuesIterator foreach {
+        case callsite @ Callsite(_, _, _, Right(Callee(callee, _, _, _, _, _, _, callsiteWarning)), _, _, _, pos, _, _) =>
+          inlineRequest(callsite, requests) match {
+            case Some(Right(req)) => requests += req
+
+            case Some(Left(w)) =>
+              if (w.emitWarning(compilerSettings)) {
+                backendReporting.inlinerWarning(callsite.callsitePosition, w.toString)
+              }
+
+            case None =>
+              if (callsiteWarning.isDefined && callsiteWarning.get.emitWarning(compilerSettings))
+                backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ callsiteWarning.get)
+          }
+
+        case Callsite(ins, _, _, Left(warning), _, _, _, pos, _, _) =>
+          if (warning.emitWarning(compilerSettings))
+            backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning")
+      }
+      (methodNode, requests)
+    }).filterNot(_._2.isEmpty).toMap
+  }
+
+  private def isTraitStaticSuperAccessorName(s: String) = s.endsWith("$")
+  private def traitStaticSuperAccessorName(s: String) = s + "$"
+
+  private def isTraitSuperAccessor(method: MethodNode, owner: ClassBType): Boolean = {
+    owner.isInterface == Right(true) && BytecodeUtils.isStaticMethod(method) && isTraitStaticSuperAccessorName(method.name)
+  }
+
+  private def findSingleCall(method: MethodNode, such: MethodInsnNode => Boolean): Option[MethodInsnNode] = {
+    @tailrec def noMoreInvoke(insn: AbstractInsnNode): Boolean = {
+      insn == null || (!insn.isInstanceOf[MethodInsnNode] && noMoreInvoke(insn.getNext))
+    }
+    @tailrec def find(insn: AbstractInsnNode): Option[MethodInsnNode] = {
+      if (insn == null) None
+      else insn match {
+        case mi: MethodInsnNode =>
+          if (such(mi) && noMoreInvoke(insn.getNext)) Some(mi)
+          else None
+        case _ =>
+          find(insn.getNext)
+      }
+    }
+    find(method.instructions.getFirst)
+  }
+  private def superAccessorInvocation(method: MethodNode): Option[MethodInsnNode] =
+    findSingleCall(method, mi => mi.itf && mi.getOpcode == Opcodes.INVOKESTATIC && isTraitStaticSuperAccessorName(mi.name))
+
+  private def isMixinForwarder(method: MethodNode, owner: ClassBType): Boolean = {
+    owner.isInterface == Right(false) &&
+      !BytecodeUtils.isStaticMethod(method) &&
+      (superAccessorInvocation(method) match {
+        case Some(mi) => mi.name == traitStaticSuperAccessorName(method.name)
+        case _ => false
+      })
+  }
+
+  private def isTraitSuperAccessorOrMixinForwarder(method: MethodNode, owner: ClassBType): Boolean = {
+    isTraitSuperAccessor(method, owner) || isMixinForwarder(method, owner)
+  }
+
+
+  /**
+   * Returns the inline request for a callsite if the callsite should be inlined according to the
+   * current heuristics (`-Yopt-inline-heuristics`).
+   *
+   * The resulting inline request may contain post-inlining requests of callsites that in turn are
+   * also selected as individual inlining requests.
+   *
+   * @return `None` if this callsite should not be inlined according to the active heuristic
+   *         `Some(Left)` if the callsite cannot be inlined (for example because that would cause
+   *           an IllegalAccessError) but should be according to the heuristic
+   *           TODO: what if a downstream inline request would cause an IAE and we don't create an
+   *           InlineRequest for the original callsite? new subclass of OptimizerWarning.
+   *         `Some(Right)` if the callsite should be and can be inlined
+   */
+  def inlineRequest(callsite: Callsite, selectedRequestsForCallee: Set[InlineRequest]): Option[Either[OptimizerWarning, InlineRequest]] = {
+    def requestIfCanInline(callsite: Callsite, reason: String): Option[Either[OptimizerWarning, InlineRequest]] = {
+      val callee = callsite.callee.get
+      if (!callee.safeToInline) {
+        if (callsite.isInlineAnnotated && callee.canInlineFromSource) {
+          // By default, we only emit inliner warnings for methods annotated @inline. However, we don't
+          // want to be unnecessarily noisy with `-opt-warnings:_`: for example, the inliner heuristic
+          // would attempt to inline `Function1.apply$sp$II`, as it's higher-order (the receiver is
+          // a function), and it's concrete (forwards to `apply`). But because it's non-final, it cannot
+          // be inlined. So we only create warnings here for methods annotated @inline.
+          Some(Left(CalleeNotFinal(
+            callee.calleeDeclarationClass.internalName,
+            callee.callee.name,
+            callee.callee.desc,
+            callsite.isInlineAnnotated)))
+        } else None
+      } else inliner.earlyCanInlineCheck(callsite) match {
+        case Some(w) => Some(Left(w))
+        case None =>
+          val postInlineRequest: List[InlineRequest] = {
+            val postCall =
+              if (isTraitSuperAccessor(callee.callee, callee.calleeDeclarationClass)) {
+                // scala-dev#259: when inlining a trait super accessor, also inline the callsite to the default method
+                val implName = callee.callee.name.dropRight(1)
+                findSingleCall(callee.callee, mi => mi.itf && mi.getOpcode == Opcodes.INVOKESPECIAL && mi.name == implName)
+              } else {
+                // scala-dev#259: when inlining a mixin forwarder, also inline the callsite to the static super accessor
+                superAccessorInvocation(callee.callee)
+              }
+            postCall.flatMap(call => {
+              callGraph.addIfMissing(callee.callee, callee.calleeDeclarationClass)
+              val maybeCallsite = callGraph.findCallSite(callee.callee, call)
+              maybeCallsite.flatMap(requestIfCanInline(_, reason).flatMap(_.right.toOption))
+            }).toList
+          }
+          Some(Right(InlineRequest(callsite, postInlineRequest, reason)))
+      }
+    }
+
+    // scala-dev#259: don't inline into static accessors and mixin forwarders
+    if (isTraitSuperAccessorOrMixinForwarder(callsite.callsiteMethod, callsite.callsiteClass)) None
+    else {
+      val callee = callsite.callee.get
+      compilerSettings.YoptInlineHeuristics.value match {
+        case "everything" =>
+          val reason = if (compilerSettings.YoptLogInline.isSetByUser) "the inline strategy is \"everything\"" else null
+          requestIfCanInline(callsite, reason)
+
+        case "at-inline-annotated" =>
+          def reason = if (!compilerSettings.YoptLogInline.isSetByUser) null else {
+            val what = if (callee.annotatedInline) "callee" else "callsite"
+            s"the $what is annotated `@inline`"
+          }
+          if (callsite.isInlineAnnotated && !callsite.isNoInlineAnnotated) requestIfCanInline(callsite, reason)
+          else None
+
+        case "default" =>
+          def reason = if (!compilerSettings.YoptLogInline.isSetByUser) null else {
+            if (callsite.isInlineAnnotated) {
+              val what = if (callee.annotatedInline) "callee" else "callsite"
+              s"the $what is annotated `@inline`"
+            } else {
+              val paramNames = Option(callee.callee.parameters).map(_.asScala.map(_.name).toVector)
+              def param(i: Int) = {
+                def syn = s"<param $i>"
+                paramNames.fold(syn)(v => v.applyOrElse(i, (_: Int) => syn))
+              }
+              def samInfo(i: Int, sam: String, arg: String) = s"the argument for parameter (${param(i)}: $sam) is a $arg"
+              val argInfos = for ((i, sam) <- callee.samParamTypes; info <- callsite.argInfos.get(i)) yield {
+                val argKind = info match {
+                  case FunctionLiteral => "function literal"
+                  case ForwardedParam(_) => "parameter of the callsite method"
+                }
+                samInfo(i, sam.internalName.split('/').last, argKind)
+              }
+              s"the callee is a higher-order method, ${argInfos.mkString(", ")}"
+            }
+          }
+          def shouldInlineHO = callee.samParamTypes.nonEmpty && (callee.samParamTypes exists {
+            case (index, _) => callsite.argInfos.contains(index)
+          })
+          if (!callsite.isNoInlineAnnotated && (callsite.isInlineAnnotated || shouldInlineHO)) requestIfCanInline(callsite, reason)
+          else None
+      }
+    }
+  }
+
+  /*
+  // using http://lihaoyi.github.io/Ammonite/
+
+  load.ivy("com.google.guava" % "guava" % "18.0")
+  val javaUtilFunctionClasses = {
+    val rt = System.getProperty("sun.boot.class.path").split(":").find(_.endsWith("lib/rt.jar")).get
+    val u = new java.io.File(rt).toURL
+    val l = new java.net.URLClassLoader(Array(u))
+    val cp = com.google.common.reflect.ClassPath.from(l)
+    cp.getTopLevelClasses("java.util.function").toArray.map(_.toString).toList
+  }
+
+  // found using IntelliJ's "Find Usages" on the @FunctionalInterface annotation
+  val otherClasses = List(
+    "com.sun.javafx.css.parser.Recognizer",
+    "java.awt.KeyEventDispatcher",
+    "java.awt.KeyEventPostProcessor",
+    "java.io.FileFilter",
+    "java.io.FilenameFilter",
+    "java.lang.Runnable",
+    "java.lang.Thread$UncaughtExceptionHandler",
+    "java.nio.file.DirectoryStream$Filter",
+    "java.nio.file.PathMatcher",
+    "java.time.temporal.TemporalAdjuster",
+    "java.time.temporal.TemporalQuery",
+    "java.util.Comparator",
+    "java.util.concurrent.Callable",
+    "java.util.logging.Filter",
+    "java.util.prefs.PreferenceChangeListener",
+    "javafx.animation.Interpolatable",
+    "javafx.beans.InvalidationListener",
+    "javafx.beans.value.ChangeListener",
+    "javafx.collections.ListChangeListener",
+    "javafx.collections.MapChangeListener",
+    "javafx.collections.SetChangeListener",
+    "javafx.event.EventHandler",
+    "javafx.util.Builder",
+    "javafx.util.BuilderFactory",
+    "javafx.util.Callback"
+  )
+
+  val allClasses = javaUtilFunctionClasses ::: otherClasses
+
+  load.ivy("org.ow2.asm" % "asm" % "5.0.4")
+  val classesAndSamNameDesc = allClasses.map(c => {
+    val cls = Class.forName(c)
+    val internalName = org.objectweb.asm.Type.getDescriptor(cls).drop(1).dropRight(1) // drop L and ;
+    val sams = cls.getMethods.filter(m => {
+      (m.getModifiers & java.lang.reflect.Modifier.ABSTRACT) != 0 &&
+      m.getName != "equals" // Comparator has an abstract override of "equals" for adding Javadoc
+    })
+    assert(sams.size == 1, internalName + sams.map(_.getName))
+    val sam = sams.head
+    val samDesc = org.objectweb.asm.Type.getMethodDescriptor(sam)
+    (internalName, sam.getName, samDesc)
+  })
+  println(classesAndSamNameDesc map {
+    case (cls, nme, desc) => s"""("$cls", "$nme$desc")"""
+  } mkString ("", ",\n", "\n"))
+  */
+  private val javaSams: Map[String, String] = Map(
+    ("java/util/function/BiConsumer", "accept(Ljava/lang/Object;Ljava/lang/Object;)V"),
+    ("java/util/function/BiFunction", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/BiPredicate", "test(Ljava/lang/Object;Ljava/lang/Object;)Z"),
+    ("java/util/function/BinaryOperator", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/BooleanSupplier", "getAsBoolean()Z"),
+    ("java/util/function/Consumer", "accept(Ljava/lang/Object;)V"),
+    ("java/util/function/DoubleBinaryOperator", "applyAsDouble(DD)D"),
+    ("java/util/function/DoubleConsumer", "accept(D)V"),
+    ("java/util/function/DoubleFunction", "apply(D)Ljava/lang/Object;"),
+    ("java/util/function/DoublePredicate", "test(D)Z"),
+    ("java/util/function/DoubleSupplier", "getAsDouble()D"),
+    ("java/util/function/DoubleToIntFunction", "applyAsInt(D)I"),
+    ("java/util/function/DoubleToLongFunction", "applyAsLong(D)J"),
+    ("java/util/function/DoubleUnaryOperator", "applyAsDouble(D)D"),
+    ("java/util/function/Function", "apply(Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/IntBinaryOperator", "applyAsInt(II)I"),
+    ("java/util/function/IntConsumer", "accept(I)V"),
+    ("java/util/function/IntFunction", "apply(I)Ljava/lang/Object;"),
+    ("java/util/function/IntPredicate", "test(I)Z"),
+    ("java/util/function/IntSupplier", "getAsInt()I"),
+    ("java/util/function/IntToDoubleFunction", "applyAsDouble(I)D"),
+    ("java/util/function/IntToLongFunction", "applyAsLong(I)J"),
+    ("java/util/function/IntUnaryOperator", "applyAsInt(I)I"),
+    ("java/util/function/LongBinaryOperator", "applyAsLong(JJ)J"),
+    ("java/util/function/LongConsumer", "accept(J)V"),
+    ("java/util/function/LongFunction", "apply(J)Ljava/lang/Object;"),
+    ("java/util/function/LongPredicate", "test(J)Z"),
+    ("java/util/function/LongSupplier", "getAsLong()J"),
+    ("java/util/function/LongToDoubleFunction", "applyAsDouble(J)D"),
+    ("java/util/function/LongToIntFunction", "applyAsInt(J)I"),
+    ("java/util/function/LongUnaryOperator", "applyAsLong(J)J"),
+    ("java/util/function/ObjDoubleConsumer", "accept(Ljava/lang/Object;D)V"),
+    ("java/util/function/ObjIntConsumer", "accept(Ljava/lang/Object;I)V"),
+    ("java/util/function/ObjLongConsumer", "accept(Ljava/lang/Object;J)V"),
+    ("java/util/function/Predicate", "test(Ljava/lang/Object;)Z"),
+    ("java/util/function/Supplier", "get()Ljava/lang/Object;"),
+    ("java/util/function/ToDoubleBiFunction", "applyAsDouble(Ljava/lang/Object;Ljava/lang/Object;)D"),
+    ("java/util/function/ToDoubleFunction", "applyAsDouble(Ljava/lang/Object;)D"),
+    ("java/util/function/ToIntBiFunction", "applyAsInt(Ljava/lang/Object;Ljava/lang/Object;)I"),
+    ("java/util/function/ToIntFunction", "applyAsInt(Ljava/lang/Object;)I"),
+    ("java/util/function/ToLongBiFunction", "applyAsLong(Ljava/lang/Object;Ljava/lang/Object;)J"),
+    ("java/util/function/ToLongFunction", "applyAsLong(Ljava/lang/Object;)J"),
+    ("java/util/function/UnaryOperator", "apply(Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("com/sun/javafx/css/parser/Recognizer", "recognize(I)Z"),
+    ("java/awt/KeyEventDispatcher", "dispatchKeyEvent(Ljava/awt/event/KeyEvent;)Z"),
+    ("java/awt/KeyEventPostProcessor", "postProcessKeyEvent(Ljava/awt/event/KeyEvent;)Z"),
+    ("java/io/FileFilter", "accept(Ljava/io/File;)Z"),
+    ("java/io/FilenameFilter", "accept(Ljava/io/File;Ljava/lang/String;)Z"),
+    ("java/lang/Runnable", "run()V"),
+    ("java/lang/Thread$UncaughtExceptionHandler", "uncaughtException(Ljava/lang/Thread;Ljava/lang/Throwable;)V"),
+    ("java/nio/file/DirectoryStream$Filter", "accept(Ljava/lang/Object;)Z"),
+    ("java/nio/file/PathMatcher", "matches(Ljava/nio/file/Path;)Z"),
+    ("java/time/temporal/TemporalAdjuster", "adjustInto(Ljava/time/temporal/Temporal;)Ljava/time/temporal/Temporal;"),
+    ("java/time/temporal/TemporalQuery", "queryFrom(Ljava/time/temporal/TemporalAccessor;)Ljava/lang/Object;"),
+    ("java/util/Comparator", "compare(Ljava/lang/Object;Ljava/lang/Object;)I"),
+    ("java/util/concurrent/Callable", "call()Ljava/lang/Object;"),
+    ("java/util/logging/Filter", "isLoggable(Ljava/util/logging/LogRecord;)Z"),
+    ("java/util/prefs/PreferenceChangeListener", "preferenceChange(Ljava/util/prefs/PreferenceChangeEvent;)V"),
+    ("javafx/animation/Interpolatable", "interpolate(Ljava/lang/Object;D)Ljava/lang/Object;"),
+    ("javafx/beans/InvalidationListener", "invalidated(Ljavafx/beans/Observable;)V"),
+    ("javafx/beans/value/ChangeListener", "changed(Ljavafx/beans/value/ObservableValue;Ljava/lang/Object;Ljava/lang/Object;)V"),
+    ("javafx/collections/ListChangeListener", "onChanged(Ljavafx/collections/ListChangeListener$Change;)V"),
+    ("javafx/collections/MapChangeListener", "onChanged(Ljavafx/collections/MapChangeListener$Change;)V"),
+    ("javafx/collections/SetChangeListener", "onChanged(Ljavafx/collections/SetChangeListener$Change;)V"),
+    ("javafx/event/EventHandler", "handle(Ljavafx/event/Event;)V"),
+    ("javafx/util/Builder", "build()Ljava/lang/Object;"),
+    ("javafx/util/BuilderFactory", "getBuilder(Ljava/lang/Class;)Ljavafx/util/Builder;"),
+    ("javafx/util/Callback", "call(Ljava/lang/Object;)Ljava/lang/Object;")
+  )
+  def javaSam(internalName: InternalName): Option[String] = javaSams.get(internalName)
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala
deleted file mode 100644
index 8d744f6d13..0000000000
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala
+++ /dev/null
@@ -1,240 +0,0 @@
-package scala.tools.nsc.backend.jvm.opt
-
-import scala.annotation.switch
-import scala.tools.asm.{Handle, Type, Opcodes}
-import scala.tools.asm.tree._
-
-object InstructionResultSize {
-  import Opcodes._
-  def apply(instruction: AbstractInsnNode): Int = (instruction.getOpcode: @switch) match {
-    // The order of opcodes is (almost) the same as in Opcodes.java
-    case ACONST_NULL => 1
-
-    case ICONST_M1 |
-         ICONST_0 |
-         ICONST_1 |
-         ICONST_2 |
-         ICONST_3 |
-         ICONST_4 |
-         ICONST_5 => 1
-
-    case LCONST_0 |
-         LCONST_1 => 2
-
-    case FCONST_0 |
-         FCONST_1 |
-         FCONST_2 => 1
-
-    case DCONST_0 |
-         DCONST_1 => 2
-
-    case BIPUSH |
-         SIPUSH => 1
-
-    case LDC =>
-      instruction.asInstanceOf[LdcInsnNode].cst match {
-        case _: java.lang.Integer |
-             _: java.lang.Float |
-             _: String |
-             _: Type |
-             _: Handle => 1
-
-        case _: java.lang.Long |
-             _: java.lang.Double => 2
-      }
-
-    case ILOAD |
-         FLOAD |
-         ALOAD => 1
-
-    case LLOAD |
-         DLOAD => 2
-
-    case IALOAD |
-         FALOAD |
-         AALOAD |
-         BALOAD |
-         CALOAD |
-         SALOAD => 1
-
-    case LALOAD |
-         DALOAD => 2
-
-    case ISTORE |
-         LSTORE |
-         FSTORE |
-         DSTORE |
-         ASTORE => 0
-
-    case IASTORE |
-         LASTORE |
-         FASTORE |
-         DASTORE |
-         AASTORE |
-         BASTORE |
-         CASTORE |
-         SASTORE => 0
-
-    case POP |
-         POP2 => 0
-
-    case DUP |
-         DUP_X1 |
-         DUP_X2 |
-         DUP2 |
-         DUP2_X1 |
-         DUP2_X2 |
-         SWAP => throw new IllegalArgumentException("Can't compute the size of DUP/SWAP without knowing what's on stack top")
-
-    case IADD |
-         FADD => 1
-
-    case LADD |
-         DADD => 2
-
-    case ISUB |
-         FSUB => 1
-
-    case LSUB |
-         DSUB => 2
-
-    case IMUL |
-         FMUL => 1
-
-    case LMUL |
-         DMUL => 2
-
-    case IDIV |
-         FDIV => 1
-
-    case LDIV |
-         DDIV => 2
-
-    case IREM |
-         FREM => 1
-
-    case LREM |
-         DREM => 2
-
-    case INEG |
-         FNEG => 1
-
-    case LNEG |
-         DNEG => 2
-
-    case ISHL |
-         ISHR => 1
-
-    case LSHL |
-         LSHR => 2
-
-    case IUSHR => 1
-
-    case LUSHR => 2
-
-    case IAND |
-         IOR |
-         IXOR => 1
-
-    case LAND |
-         LOR |
-         LXOR => 2
-
-    case IINC => 1
-
-    case I2F |
-         L2I |
-         L2F |
-         F2I |
-         D2I |
-         D2F |
-         I2B |
-         I2C |
-         I2S => 1
-
-    case I2L |
-         I2D |
-         L2D |
-         F2L |
-         F2D |
-         D2L => 2
-
-    case LCMP |
-         FCMPL |
-         FCMPG |
-         DCMPL |
-         DCMPG => 1
-
-    case IFEQ |
-         IFNE |
-         IFLT |
-         IFGE |
-         IFGT |
-         IFLE => 0
-
-    case IF_ICMPEQ |
-         IF_ICMPNE |
-         IF_ICMPLT |
-         IF_ICMPGE |
-         IF_ICMPGT |
-         IF_ICMPLE |
-         IF_ACMPEQ |
-         IF_ACMPNE => 0
-
-    case GOTO => 0
-
-    case JSR => throw new IllegalArgumentException("Subroutines are not supported.")
-
-    case RET => 0
-
-    case TABLESWITCH |
-         LOOKUPSWITCH => 0
-
-    case IRETURN |
-         FRETURN |
-         ARETURN => 1
-
-    case LRETURN |
-         DRETURN => 2
-
-    case RETURN => 0
-
-    case GETSTATIC => Type.getType(instruction.asInstanceOf[FieldInsnNode].desc).getSize
-
-    case PUTSTATIC => 0
-
-    case GETFIELD => Type.getType(instruction.asInstanceOf[FieldInsnNode].desc).getSize
-
-    case PUTFIELD => 0
-
-    case INVOKEVIRTUAL |
-         INVOKESPECIAL |
-         INVOKESTATIC |
-         INVOKEINTERFACE =>
-      val desc = instruction.asInstanceOf[MethodInsnNode].desc
-      Type.getReturnType(desc).getSize
-
-    case INVOKEDYNAMIC =>
-      val desc = instruction.asInstanceOf[InvokeDynamicInsnNode].desc
-      Type.getReturnType(desc).getSize
-
-    case NEW => 1
-
-    case NEWARRAY |
-         ANEWARRAY |
-         ARRAYLENGTH => 1
-
-    case ATHROW => 0
-
-    case CHECKCAST |
-         INSTANCEOF => 1
-
-    case MONITORENTER |
-         MONITOREXIT => 0
-
-    case MULTIANEWARRAY => 1
-
-    case IFNULL |
-         IFNONNULL => 0
-  }
-}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
index 4132710a96..9c22b09cdd 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
@@ -7,79 +7,180 @@ package scala.tools.nsc
 package backend.jvm
 package opt
 
-import scala.annotation.switch
-import scala.tools.asm.Opcodes
-import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter}
+import scala.annotation.{tailrec, switch}
+
+import scala.tools.asm.Type
+import scala.tools.asm.tree.analysis.Frame
+import scala.tools.asm.Opcodes._
 import scala.tools.asm.tree._
-import scala.collection.convert.decorateAsScala._
+import scala.collection.mutable
+import scala.collection.JavaConverters._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.tools.nsc.backend.jvm.analysis._
 import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
 
 /**
- * Optimizations within a single method.
+ * Optimizations within a single method. Certain optimizations enable others, for example removing
+ * unreachable code can render a `try` block empty and enable removeEmptyExceptionHandlers. The
+ * latter in turn enables more unreachable code to be eliminated (the `catch` block), so there is
+ * a cyclic dependency. Optimizations that depend on each other are therefore executed in a loop
+ * until reaching a fixpoint.
+ *
+ * The optimizations marked UPSTREAM enable optimizations that were already executed, so they cause
+ * another iteration in the fixpoint loop.
+ *
+ * nullness optimizations: rewrite null-checking branches to GOTO if nullness is known
+ *   + enables downstream
+ *     - unreachable code (null / non-null branch becomes unreachable)
+ *     - box-unbox elimination (may render an escaping consumer of a box unreachable)
+ *     - stale stores (aload x is replaced by aconst_null if it's known null)
+ *     - simplify jumps (replaces conditional jumps by goto, so may enable goto chains)
+ *
+ * unreachable code / DCE (removes instructions of basic blocks to which there is no branch)
+ *   + enables downstream:
+ *     - stale stores (loads may be eliminated, removing consumers of a store)
+ *     - empty handlers (try blocks may become empty)
+ *     - simplify jumps (goto l; [dead code]; l: ..) => remove goto
+ *     - stale local variable descriptors
+ *     - (not box-unbox, which is implemented using prod-cons, so it doesn't consider dead code)
+ *
+ *   note that eliminating empty handlers and stale local variable descriptors is required for
+ *   correctness, see the comment in the body of `methodOptimizations`.
+ *
+ * box-unbox elimination (eliminates box-unbox pairs within the same method)
+ *   + enables UPSTREAM:
+ *     - nullness optimizations (a box extraction operation (unknown nullness) may be rewritten to
+ *       a read of a non-null local. example in doc comment of box-unbox implementation)
+ *     - further box-unbox elimination (e.g. an Integer stored in a Tuple; eliminating the tuple may
+ *       enable eliminating the Integer)
+ *   + enables downstream:
+ *     - copy propagation (new locals are introduced, may be aliases of existing)
+ *     - stale stores (multi-value boxes where not all values are used)
+ *     - redundant casts (`("a", "b")._1`: the generic `_1` method returns `Object`, a cast
+ *       to String is added. The cast is redundant after eliminating the tuple.)
+ *     - empty local variable descriptors (local variables that were holding the box may become unused)
+ *
+ * copy propagation (replaces LOAD n to the LOAD m for the smallest m that is an alias of n)
+ *   + enables downstream:
+ *     - stale stores (a stored value may not be loaded anymore)
+ *     - store-load pairs (a load n may now be right after a store n)
+ *   + NOTE: copy propagation is only executed once, in the first fixpoint loop iteration. none of
+ *     the other optimizations enables further copy prop. we still run it as part of the loop
+ *     because it requires unreachable code to be eliminated.
+ *
+ * stale stores (replace STORE by POP)
+ *   + enables downstream:
+ *     - push-pop (the new pop may be the single consumer for an instruction)
+ *
+ * redundant casts: eliminates casts that are statically known to succeed (uses type propagation)
+ *   + enables UPSTREAM:
+ *     - box-unbox elimination (a removed checkcast may be a box consumer)
+ *   + enables downstream:
+ *     - push-pop for closure allocation elimination (every indyLambda is followed by a checkcast, see SI-9540)
+ *
+ * push-pop (when a POP is the only consumer of a value, remove the POP and its producer)
+ *   + enables UPSTREAM:
+ *     - stale stores (if a LOAD is removed, a corresponding STORE may become stale)
+ *     - box-unbox elimination (push-pop may eliminate a closure allocation, rendering a captured
+ *       box non-escaping)
+ *   + enables downstream:
+ *     - store-load pairs (a variable may become non-live)
+ *     - stale handlers (push-pop removes code)
+ *     - simplify jumps (push-pop removes code)
+ *
+ * store-load pairs (remove `STORE x; LOAD x` if x is otherwise not used in the method)
+ *   + enables downstream:
+ *     - empty handlers (code is removes, a try block may become empty
+ *     - simplify jumps (code is removed, a goto may become redundant for example)
+ *     - stale local variable descriptors
  *
- * unreachable code
- *   - removes instructions of basic blocks to which no branch instruction points
- *   + enables eliminating some exception handlers and local variable descriptors
- *     > eliminating them is required for correctness, as explained in `removeUnreachableCode`
+ * empty handlers (removes exception handlers whose try block is empty)
+ *   + enables UPSTREAM:
+ *     - unreachable code (catch block becomes unreachable)
+ *     - box-unbox (a box may be escape in an operation in a dead handler)
+ *   + enables downstream:
+ *     - simplify jumps
  *
- * empty exception handlers
- *   - removes exception handlers whose try block is empty
- *   + eliminating a handler where the try block is empty and reachable will turn the catch block
- *     unreachable. in this case "unreachable code" is invoked recursively until reaching a fixpoint.
- *     > for try blocks that are unreachable, "unreachable code" removes also the instructions of the
- *       catch block, and the recursive invocation is not necessary.
+ * simplify jumps (various, like `GOTO l; l: ...`, see doc comments of individual optimizations)
+ *   + enables UPSTREAM
+ *     - unreachable code (`GOTO a; a: GOTO b; b: ...`, the first jump is changed to `GOTO b`, the second becomes unreachable)
+ *     - store-load pairs (a `GOTO l; l: ...` is removed between store and load)
+ *     - push-pop (`IFNULL l; l: ...` is replaced by `POP`)
  *
- * simplify jumps
- *   - various simplifications, see doc comments of individual optimizations
- *   + changing or eliminating jumps may render some code unreachable, therefore "simplify jumps" is
- *     executed in a loop with "unreachable code"
  *
- * empty local variable descriptors
- *   - removes entries from the local variable table where the variable is not actually used
- *   + enables eliminating labels that the entry points to (if they are not otherwise referenced)
+ * The following cleanup optimizations don't enable any upstream optimizations, so they can be
+ * executed once at the end, when the above optimizations reach a fixpoint.
  *
- * empty line numbers
- *   - eliminates line number nodes that describe no executable instructions
- *   + enables eliminating the label of the line number node (if it's not otherwise referenced)
  *
- * stale labels
- *   - eliminate labels that are not referenced, merge sequences of label definitions.
+ * empty local variable descriptors (removes unused variables from the local variable table)
+ *   + enables downstream:
+ *     - stale labels (labels that the entry points to, if not otherwise referenced)
+ *
+ * empty line numbers (eliminates line number nodes that describe no executable instructions)
+ *   + enables downstream:
+ *     - stale labels (label of the line number node, if not otherwise referenced)
+ *
+ * stale labels (eliminate labels that are not referenced, merge sequences of label definitions)
+ *
+ *
+ * Note on a method's maxLocals / maxStack: the backend only uses those values for running
+ * Analyzers. The values can be conservative approximations: if an optimization removes code and
+ * the maximal stack size is now smaller, the larger maxStack value will still work fine for
+ * running an Analyzer (just that frames allocate more space than required). The correct max
+ * values written to the bytecode are re-computed during classfile serialization.
+ * To keep things simpler, we don't update the max values in every optimization:
+ *   - we do it in `removeUnreachableCodeImpl`, because it's quite straightforward
+ *   - maxLocals is updated in `compactLocalVariables`, which runs at the end of method optimizations
+ *
+ *
+ * Note on updating the call graph: whenever an optimization eliminates a callsite or a closure
+ * instantiation, we eliminate the corresponding entry from the call graph.
  */
 class LocalOpt[BT <: BTypes](val btypes: BT) {
   import LocalOptImpls._
   import btypes._
+  import coreBTypes._
+  import backendUtils._
+
+  val boxUnbox = new BoxUnbox(btypes)
+  import boxUnbox._
+
+  val copyProp = new CopyProp(btypes)
+  import copyProp._
 
   /**
    * Remove unreachable code from a method.
    *
    * This implementation only removes instructions that are unreachable for an ASM analyzer /
    * interpreter. This ensures that future analyses will not produce `null` frames. The inliner
-   * and call graph builder depend on this property.
+   * depends on this property.
    *
    * @return A set containing the eliminated instructions
    */
-  def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Set[AbstractInsnNode] = {
-    if (method.instructions.size == 0) return Set.empty     // fast path for abstract methods
-    if (unreachableCodeEliminated(method)) return Set.empty // we know there is no unreachable code
+  def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Boolean = {
+    // In principle, for the inliner, a single removeUnreachableCodeImpl would be enough. But that
+    // would potentially leave behind stale handlers (empty try block) which is not legal in the
+    // classfile. So we run both removeUnreachableCodeImpl and removeEmptyExceptionHandlers.
+    if (method.instructions.size == 0) return false     // fast path for abstract methods
+    if (unreachableCodeEliminated(method)) return false // we know there is no unreachable code
+    if (!AsmAnalyzer.sizeOKForBasicValue(method)) return false // the method is too large for running an analyzer
 
     // For correctness, after removing unreachable code, we have to eliminate empty exception
     // handlers, see scaladoc of def methodOptimizations. Removing an live handler may render more
     // code unreachable and therefore requires running another round.
-    def removalRound(): Set[AbstractInsnNode] = {
-      val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName)
-      val removedRecursively = if (removedInstructions.nonEmpty) {
+    def removalRound(): Boolean = {
+      val (insnsRemoved, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName)
+      if (insnsRemoved) {
         val liveHandlerRemoved = removeEmptyExceptionHandlers(method).exists(h => liveLabels(h.start))
         if (liveHandlerRemoved) removalRound()
-        else Set.empty
-      } else Set.empty
-      removedInstructions ++ removedRecursively
+      }
+      insnsRemoved
     }
 
-    val removedInstructions = removalRound()
-    if (removedInstructions.nonEmpty) removeUnusedLocalVariableNodes(method)()
+    val changed = removalRound()
+    if (changed) removeUnusedLocalVariableNodes(method)()
     unreachableCodeEliminated += method
-    removedInstructions
+    changed
   }
 
   /**
@@ -90,21 +191,13 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
    * @return      `true` if unreachable code was eliminated in some method, `false` otherwise.
    */
   def methodOptimizations(clazz: ClassNode): Boolean = {
-    !compilerSettings.YoptNone && clazz.methods.asScala.foldLeft(false) {
+    !compilerSettings.optNone && clazz.methods.asScala.foldLeft(false) {
       case (changed, method) => methodOptimizations(method, clazz.name) || changed
     }
   }
 
   /**
-   * Remove unreachable code from a method.
-   *
-   * We rely on dead code elimination provided by the ASM framework, as described in the ASM User
-   * Guide (http://asm.ow2.org/index.html), Section 8.2.1. It runs a data flow analysis, which only
-   * computes Frame information for reachable instructions. Instructions for which no Frame data is
-   * available after the analysis are unreachable.
-   *
-   * Also simplifies branching instructions, removes unused local variable descriptors, empty
-   * exception handlers, unnecessary label declarations and empty line number nodes.
+   * Run method-level optimizations, see comment on class [[LocalOpt]].
    *
    * Returns `true` if the bytecode of `method` was changed.
    */
@@ -137,36 +230,151 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar
     // for local variables in dead blocks. Maybe that's a bug in the ASM framework.
 
-    def removalRound(): Boolean = {
-      // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt)
-      val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode) {
-        val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName)
-        val removedHandlers = removeEmptyExceptionHandlers(method)
-        (removedInstructions.nonEmpty, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start)))
-      } else {
-        (false, false, false)
+    var currentTrace: String = null
+    val methodPrefix = {val p = compilerSettings.YoptTrace.value; if (p == "_") "" else p }
+    val doTrace = compilerSettings.YoptTrace.isSetByUser && s"$ownerClassName.${method.name}".startsWith(methodPrefix)
+    def traceIfChanged(optName: String): Unit = if (doTrace) {
+      val after = AsmUtils.textify(method)
+      if (currentTrace != after) {
+        println(s"after $optName")
+        println(after)
       }
-
-      val jumpsChanged = if (compilerSettings.YoptSimplifyJumps) simplifyJumps(method) else false
-
-      // Eliminating live handlers and simplifying jump instructions may render more code
-      // unreachable, so we need to run another round.
-      if (liveHandlerRemoved || jumpsChanged) removalRound()
-
-      codeRemoved || handlersRemoved || jumpsChanged
+      currentTrace = after
     }
 
-    val codeHandlersOrJumpsChanged = removalRound()
+    /**
+     * Runs the optimizations that depend on each other in a loop until reaching a fixpoint. See
+     * comment in class [[LocalOpt]].
+     *
+     * Returns a pair of booleans (codeChanged, requireEliminateUnusedLocals).
+     */
+    def removalRound(
+        requestNullness: Boolean,
+        requestDCE: Boolean,
+        requestBoxUnbox: Boolean,
+        requestStaleStores: Boolean,
+        requestPushPop: Boolean,
+        requestStoreLoad: Boolean,
+        firstIteration: Boolean,
+        maxRecursion: Int = 10): (Boolean, Boolean) = {
+      if (maxRecursion == 0) return (false, false)
+
+      traceIfChanged("beforeMethodOpt")
+
+      // NULLNESS OPTIMIZATIONS
+      val runNullness = compilerSettings.optNullnessTracking && requestNullness
+      val nullnessOptChanged = runNullness && nullnessOptimizations(method, ownerClassName)
+      traceIfChanged("nullness")
+
+      // UNREACHABLE CODE
+      // Both AliasingAnalyzer (used in copyProp) and ProdConsAnalyzer (used in eliminateStaleStores,
+      // boxUnboxElimination) require not having unreachable instructions (null frames).
+      val runDCE = (compilerSettings.optUnreachableCode && (requestDCE || nullnessOptChanged)) ||
+        compilerSettings.optBoxUnbox ||
+        compilerSettings.optCopyPropagation
+      val (codeRemoved, liveLabels) = if (runDCE) removeUnreachableCodeImpl(method, ownerClassName) else (false, Set.empty[LabelNode])
+      traceIfChanged("dce")
+
+      // BOX-UNBOX
+      val runBoxUnbox = compilerSettings.optBoxUnbox && (requestBoxUnbox || nullnessOptChanged)
+      val boxUnboxChanged = runBoxUnbox && boxUnboxElimination(method, ownerClassName)
+      traceIfChanged("boxUnbox")
+
+      // COPY PROPAGATION
+      val runCopyProp = compilerSettings.optCopyPropagation && (firstIteration || boxUnboxChanged)
+      val copyPropChanged = runCopyProp && copyPropagation(method, ownerClassName)
+      traceIfChanged("copyProp")
+
+      // STALE STORES
+      val runStaleStores = compilerSettings.optCopyPropagation && (requestStaleStores || nullnessOptChanged || codeRemoved || boxUnboxChanged || copyPropChanged)
+      val storesRemoved = runStaleStores && eliminateStaleStores(method, ownerClassName)
+      traceIfChanged("staleStores")
+
+      // REDUNDANT CASTS
+      val runRedundantCasts = compilerSettings.optRedundantCasts && (firstIteration || boxUnboxChanged)
+      val castRemoved = runRedundantCasts && eliminateRedundantCasts(method, ownerClassName)
+      traceIfChanged("redundantCasts")
+
+      // PUSH-POP
+      val runPushPop = compilerSettings.optCopyPropagation && (requestPushPop || firstIteration || storesRemoved || castRemoved)
+      val pushPopRemoved = runPushPop && eliminatePushPop(method, ownerClassName)
+      traceIfChanged("pushPop")
+
+      // STORE-LOAD PAIRS
+      val runStoreLoad = compilerSettings.optCopyPropagation && (requestStoreLoad || boxUnboxChanged || copyPropChanged || pushPopRemoved)
+      val storeLoadRemoved = runStoreLoad && eliminateStoreLoad(method)
+      traceIfChanged("storeLoadPairs")
+
+      // STALE HANDLERS
+      val removedHandlers = if (runDCE) removeEmptyExceptionHandlers(method) else Set.empty[TryCatchBlockNode]
+      val handlersRemoved = removedHandlers.nonEmpty
+      val liveHandlerRemoved = removedHandlers.exists(h => liveLabels(h.start))
+      traceIfChanged("staleHandlers")
+
+      // SIMPLIFY JUMPS
+      // almost all of the above optimizations enable simplifying more jumps, so we just run it in every iteration
+      val runSimplifyJumps = compilerSettings.optSimplifyJumps
+      val jumpsChanged = runSimplifyJumps && simplifyJumps(method)
+      traceIfChanged("simplifyJumps")
+
+      // See doc comment in the beginning of this file (optimizations marked UPSTREAM)
+      val runNullnessAgain = boxUnboxChanged
+      val runDCEAgain = liveHandlerRemoved || jumpsChanged
+      val runBoxUnboxAgain = boxUnboxChanged || castRemoved || pushPopRemoved || liveHandlerRemoved
+      val runStaleStoresAgain = pushPopRemoved
+      val runPushPopAgain = jumpsChanged
+      val runStoreLoadAgain = jumpsChanged
+      val runAgain = runNullnessAgain || runDCEAgain || runBoxUnboxAgain || pushPopRemoved || runStaleStoresAgain || runPushPopAgain || runStoreLoadAgain
+
+      val downstreamRequireEliminateUnusedLocals = runAgain && removalRound(
+        requestNullness = runNullnessAgain,
+        requestDCE = runDCEAgain,
+        requestBoxUnbox = runBoxUnboxAgain,
+        requestStaleStores = runStaleStoresAgain,
+        requestPushPop = runPushPopAgain,
+        requestStoreLoad = runStoreLoadAgain,
+        firstIteration = false,
+        maxRecursion = maxRecursion - 1)._2
+
+      val requireEliminateUnusedLocals = downstreamRequireEliminateUnusedLocals ||
+        nullnessOptChanged || // nullness opt may eliminate stores / loads, rendering a local unused
+        codeRemoved ||        // see comment in method `methodOptimizations`
+        boxUnboxChanged ||    // box-unbox renders locals (holding boxes) unused
+        storesRemoved  ||
+        storeLoadRemoved ||
+        handlersRemoved
+
+      val codeChanged = nullnessOptChanged || codeRemoved || boxUnboxChanged || castRemoved || copyPropChanged || storesRemoved || pushPopRemoved || storeLoadRemoved || handlersRemoved || jumpsChanged
+      (codeChanged, requireEliminateUnusedLocals)
+    }
 
-    // (*) Removing stale local variable descriptors is required for correctness of unreachable-code
+    val (nullnessDceBoxesCastsCopypropPushpopOrJumpsChanged, requireEliminateUnusedLocals) = if (AsmAnalyzer.sizeOKForBasicValue(method)) {
+      // we run DCE even if the method is already in the `unreachableCodeEliminated` map: the DCE
+      // here is more thorough than `minimalRemoveUnreachableCode` that run before inlining.
+      val r = removalRound(
+        requestNullness = true,
+        requestDCE = true,
+        requestBoxUnbox = true,
+        requestStaleStores = true,
+        requestPushPop = true,
+        requestStoreLoad = true,
+        firstIteration = true)
+      if (compilerSettings.optUnreachableCode) unreachableCodeEliminated += method
+      r
+    } else (false, false)
+
+    // (*) Removing stale local variable descriptors is required for correctness, see comment in `methodOptimizations`
     val localsRemoved =
-      if (compilerSettings.YoptCompactLocals) compactLocalVariables(method) // also removes unused
-      else if (compilerSettings.YoptUnreachableCode) removeUnusedLocalVariableNodes(method)() // (*)
+      if (compilerSettings.optCompactLocals) compactLocalVariables(method) // also removes unused
+      else if (requireEliminateUnusedLocals) removeUnusedLocalVariableNodes(method)() // (*)
       else false
+    traceIfChanged("localVariables")
 
-    val lineNumbersRemoved = if (compilerSettings.YoptEmptyLineNumbers) removeEmptyLineNumbers(method) else false
+    val lineNumbersRemoved = if (compilerSettings.optUnreachableCode) removeEmptyLineNumbers(method) else false
+    traceIfChanged("lineNumbers")
 
-    val labelsRemoved = if (compilerSettings.YoptEmptyLabels) removeEmptyLabelNodes(method) else false
+    val labelsRemoved = if (compilerSettings.optUnreachableCode) removeEmptyLabelNodes(method) else false
+    traceIfChanged("labels")
 
     // assert that local variable annotations are empty (we don't emit them) - otherwise we'd have
     // to eliminate those covering an empty range, similar to removeUnusedLocalVariableNodes.
@@ -174,53 +382,198 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     assert(nullOrEmpty(method.visibleLocalVariableAnnotations), method.visibleLocalVariableAnnotations)
     assert(nullOrEmpty(method.invisibleLocalVariableAnnotations), method.invisibleLocalVariableAnnotations)
 
-    unreachableCodeEliminated += method
-
-    codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved
+    nullnessDceBoxesCastsCopypropPushpopOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved
   }
 
-}
+  /**
+   * Apply various optimizations based on nullness analysis information.
+   *   - IFNULL / IFNONNULL are rewritten to GOTO if nullness is known
+   *   - IF_ACMPEQ / IF_ACMPNE are rewritten to GOTO if the both references are known null, or if
+   *     one is known null and the other known not-null
+   *   - ALOAD is replaced by ACONST_NULL if the local is known to hold null
+   *   - ASTORE of null is removed if the local is known to hold null
+   *   - INSTANCEOF of null is replaced by `ICONST_0`
+   *   - scala.runtime.BoxesRunTime.unboxToX(null) is rewritten to a zero-value load
+   */
+  def nullnessOptimizations(method: MethodNode, ownerClassName: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForNullness(method) && {
+      lazy val nullnessAnalyzer = new AsmAnalyzer(method, ownerClassName, new NullnessAnalyzer(btypes, method))
+
+      // When running nullness optimizations the method may still have unreachable code. Analyzer
+      // frames of unreachable instructions are `null`.
+      def frameAt(insn: AbstractInsnNode): Option[Frame[NullnessValue]] = Option(nullnessAnalyzer.frameAt(insn))
+
+      def nullness(insn: AbstractInsnNode, slot: Int): Option[NullnessValue] = {
+        frameAt(insn).map(_.getValue(slot))
+      }
+
+      def isNull(insn: AbstractInsnNode, slot: Int) = nullness(insn, slot).contains(NullValue)
+
+      // cannot change instructions while iterating, it gets the analysis out of synch (indexed by instructions)
+      val toReplace = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]]
+
+      val it = method.instructions.iterator()
+      while (it.hasNext) it.next() match {
+        case vi: VarInsnNode if isNull(vi, vi.`var`) =>
+          if (vi.getOpcode == ALOAD)
+            toReplace(vi) = List(new InsnNode(ACONST_NULL))
+          else if (vi.getOpcode == ASTORE)
+            for (frame <- frameAt(vi) if frame.peekStack(0) == NullValue)
+              toReplace(vi) = List(getPop(1))
+
+        case ji: JumpInsnNode =>
+          val isIfNull = ji.getOpcode == IFNULL
+          val isIfNonNull = ji.getOpcode == IFNONNULL
+          if (isIfNull || isIfNonNull) for (frame <- frameAt(ji)) {
+            val nullness = frame.peekStack(0)
+            val taken = nullness == NullValue && isIfNull || nullness == NotNullValue && isIfNonNull
+            val avoided = nullness == NotNullValue && isIfNull || nullness == NullValue && isIfNonNull
+            if (taken || avoided) {
+              val jump = if (taken) List(new JumpInsnNode(GOTO, ji.label)) else Nil
+              toReplace(ji) = getPop(1) :: jump
+            }
+          } else {
+            val isIfEq = ji.getOpcode == IF_ACMPEQ
+            val isIfNe = ji.getOpcode == IF_ACMPNE
+            if (isIfEq || isIfNe) for (frame <- frameAt(ji)) {
+              val aNullness = frame.peekStack(1)
+              val bNullness = frame.peekStack(0)
+              val eq = aNullness == NullValue && bNullness == NullValue
+              val ne = aNullness == NullValue && bNullness == NotNullValue || aNullness == NotNullValue && bNullness == NullValue
+              val taken = isIfEq && eq || isIfNe && ne
+              val avoided = isIfEq && ne || isIfNe && eq
+              if (taken || avoided) {
+                val jump = if (taken) List(new JumpInsnNode(GOTO, ji.label)) else Nil
+                toReplace(ji) = getPop(1) :: getPop(1) :: jump
+              }
+            }
+          }
+
+        case ti: TypeInsnNode =>
+          if (ti.getOpcode == INSTANCEOF) for (frame <- frameAt(ti) if frame.peekStack(0) == NullValue) {
+            toReplace(ti) = List(getPop(1), new InsnNode(ICONST_0))
+          }
+
+        case mi: MethodInsnNode =>
+          if (isScalaUnbox(mi)) for (frame <- frameAt(mi) if frame.peekStack(0) == NullValue) {
+            toReplace(mi) = List(
+              getPop(1),
+              loadZeroForTypeSort(Type.getReturnType(mi.desc).getSort))
+          }
+
+        case _ =>
+      }
+
+      def removeFromCallGraph(insn: AbstractInsnNode): Unit = insn match {
+        case mi: MethodInsnNode => callGraph.removeCallsite(mi, method)
+        case _ =>
+      }
+
+      for ((oldOp, newOps) <- toReplace) {
+        for (newOp <- newOps) method.instructions.insertBefore(oldOp, newOp)
+        method.instructions.remove(oldOp)
+        removeFromCallGraph(oldOp)
+      }
+
+      toReplace.nonEmpty
+    }
+  }
 
-object LocalOptImpls {
   /**
    * Removes unreachable basic blocks.
    *
-   * TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow.
-   *
    * @return A set containing eliminated instructions, and a set containing all live label nodes.
    */
-  def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Set[AbstractInsnNode], Set[LabelNode]) = {
-    // The data flow analysis requires the maxLocals / maxStack fields of the method to be computed.
-    computeMaxLocalsMaxStack(method)
-    val a = new Analyzer(new BasicInterpreter)
-    a.analyze(ownerClassName, method)
-    val frames = a.getFrames
+  def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Boolean, Set[LabelNode]) = {
+    val a = new AsmAnalyzer(method, ownerClassName)
+    val frames = a.analyzer.getFrames
 
-    val initialSize = method.instructions.size
     var i = 0
     var liveLabels = Set.empty[LabelNode]
-    var removedInstructions = Set.empty[AbstractInsnNode]
+    var changed = false
+    var maxLocals = parametersSize(method)
+    var maxStack = 0
     val itr = method.instructions.iterator()
     while (itr.hasNext) {
-      itr.next() match {
-        case l: LabelNode =>
-          if (frames(i) != null) liveLabels += l
+      val insn = itr.next()
+      val isLive = frames(i) != null
+      if (isLive) maxStack = math.max(maxStack, frames(i).getStackSize)
 
-        case ins =>
+      insn match {
+        case l: LabelNode =>
           // label nodes are not removed: they might be referenced for example in a LocalVariableNode
-          if (frames(i) == null || ins.getOpcode == Opcodes.NOP) {
+          if (isLive) liveLabels += l
+
+        case v: VarInsnNode if isLive =>
+          val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0
+          maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 because local numbers are 0-based
+
+        case i: IincInsnNode if isLive =>
+          maxLocals = math.max(maxLocals, i.`var` + 1)
+
+        case _ =>
+          if (!isLive || insn.getOpcode == NOP) {
             // Instruction iterators allow removing during iteration.
             // Removing is O(1): instructions are doubly linked list elements.
             itr.remove()
-            removedInstructions += ins
+            changed = true
+            insn match {
+              case invocation: MethodInsnNode => callGraph.removeCallsite(invocation, method)
+              case indy: InvokeDynamicInsnNode => callGraph.removeClosureInstantiation(indy, method)
+              case _ =>
+            }
           }
       }
       i += 1
     }
-    (removedInstructions, liveLabels)
+    method.maxLocals = maxLocals
+    method.maxStack  = maxStack
+    (changed, liveLabels)
   }
 
   /**
+   * Eliminate `CHECKCAST` instructions that are statically known to succeed. This is safe if the
+   * tested object is null: `null.asInstanceOf` always succeeds.
+   *
+   * The type of the tested object is determined using a NonLubbingTypeFlowAnalyzer. Note that this
+   * analysis collapses LUBs of non-equal references types to Object for simplicity. Example:
+   * given `B <: A <: Object`, the cast in `(if (..) new B else new A).asInstanceOf[A]` would not
+   * be eliminated.
+   *
+   * Note: we cannot replace `INSTANCEOF` tests by only looking at the types, `null.isInstanceOf`
+   * always returns false, so we'd also need nullness information.
+   */
+  def eliminateRedundantCasts(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForBasicValue(method) && {
+      def isSubType(aRefDesc: String, bClass: InternalName): Boolean = aRefDesc == bClass || bClass == ObjectRef.internalName || {
+        (bTypeForDescriptorOrInternalNameFromClassfile(aRefDesc) conformsTo classBTypeFromParsedClassfile(bClass)).getOrElse(false)
+      }
+
+      lazy val typeAnalyzer = new NonLubbingTypeFlowAnalyzer(method, owner)
+
+      // cannot remove instructions while iterating, it gets the analysis out of synch (indexed by instructions)
+      val toRemove = mutable.Set.empty[TypeInsnNode]
+
+      val it = method.instructions.iterator()
+      while (it.hasNext) it.next() match {
+        case ti: TypeInsnNode if ti.getOpcode == CHECKCAST =>
+          val frame = typeAnalyzer.frameAt(ti)
+          val valueTp = frame.getValue(frame.stackTop)
+          if (valueTp.isReference && isSubType(valueTp.getType.getDescriptor, ti.desc)) {
+            toRemove += ti
+          }
+
+        case _ =>
+      }
+
+      toRemove foreach method.instructions.remove
+      toRemove.nonEmpty
+    }
+  }
+}
+
+object LocalOptImpls {
+  /**
    * Remove exception handlers that cover empty code blocks. A block is considered empty if it
    * consist only of labels, frames, line numbers, nops and gotos.
    *
@@ -235,16 +588,16 @@ object LocalOptImpls {
   def removeEmptyExceptionHandlers(method: MethodNode): Set[TryCatchBlockNode] = {
     /** True if there exists code between start and end. */
     def containsExecutableCode(start: AbstractInsnNode, end: LabelNode): Boolean = {
-      start != end && ((start.getOpcode : @switch) match {
+      start != end && ((start.getOpcode: @switch) match {
         // FrameNode, LabelNode and LineNumberNode have opcode == -1.
-        case -1 | Opcodes.GOTO => containsExecutableCode(start.getNext, end)
+        case -1 | GOTO => containsExecutableCode(start.getNext, end)
         case _ => true
       })
     }
 
     var removedHandlers = Set.empty[TryCatchBlockNode]
     val handlersIter = method.tryCatchBlocks.iterator()
-    while(handlersIter.hasNext) {
+    while (handlersIter.hasNext) {
       val handler = handlersIter.next()
       if (!containsExecutableCode(handler.start, handler.end)) {
         removedHandlers += handler
@@ -263,9 +616,10 @@ object LocalOptImpls {
    * same type or name.
    */
   def removeUnusedLocalVariableNodes(method: MethodNode)(firstLocalIndex: Int = parametersSize(method), renumber: Int => Int = identity): Boolean = {
-    def variableIsUsed(start: AbstractInsnNode, end: LabelNode, varIndex: Int): Boolean = {
+    @tailrec def variableIsUsed(start: AbstractInsnNode, end: LabelNode, varIndex: Int): Boolean = {
       start != end && (start match {
         case v: VarInsnNode if v.`var` == varIndex => true
+        case i: IincInsnNode if i.`var` == varIndex => true
         case _ => variableIsUsed(start.getNext, end, varIndex)
       })
     }
@@ -285,17 +639,6 @@ object LocalOptImpls {
   }
 
   /**
-   * The number of local variable slots used for parameters and for the `this` reference.
-   */
-  private def parametersSize(method: MethodNode): Int = {
-    // Double / long fields occupy two slots, so we sum up the sizes. Since getSize returns 0 for
-    // void, we have to add `max 1`.
-    val paramsSize = scala.tools.asm.Type.getArgumentTypes(method.desc).iterator.map(_.getSize max 1).sum
-    val thisSize   = if ((method.access & Opcodes.ACC_STATIC) == 0) 1 else 0
-    paramsSize + thisSize
-  }
-
-  /**
    * Compact the local variable slots used in the method's implementation. This prevents having
    * unused slots for example after eliminating unreachable code.
    *
@@ -310,12 +653,9 @@ object LocalOptImpls {
     val renumber = collection.mutable.ArrayBuffer.empty[Int]
 
     // Add the index of the local variable used by `varIns` to the `renumber` array.
-    def addVar(varIns: VarInsnNode): Unit = {
-      val index = varIns.`var`
-      val isWide = (varIns.getOpcode: @switch) match {
-        case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true
-        case _ => false
-      }
+    def addVar(varIns: AbstractInsnNode, slot: Int): Unit = {
+      val index = slot
+      val isWide = isSize2LoadOrStore(varIns.getOpcode)
 
       // Ensure the length of `renumber`. Unused variable indices are mapped to -1.
       val minLength = if (isWide) index + 2 else index + 1
@@ -332,7 +672,7 @@ object LocalOptImpls {
     val firstLocalIndex = parametersSize(method)
     for (i <- 0 until firstLocalIndex) renumber += i // parameters and `this` are always used.
     method.instructions.iterator().asScala foreach {
-      case VarInstruction(varIns) => addVar(varIns)
+      case VarInstruction(varIns, slot) => addVar(varIns, slot)
       case _ =>
     }
 
@@ -353,10 +693,12 @@ object LocalOptImpls {
       // update variable instructions according to the renumber table
       method.maxLocals = nextIndex
       method.instructions.iterator().asScala.foreach {
-        case VarInstruction(varIns) =>
-          val oldIndex = varIns.`var`
-          if (oldIndex >= firstLocalIndex && renumber(oldIndex) != oldIndex)
-            varIns.`var` = renumber(varIns.`var`)
+        case VarInstruction(varIns, slot) =>
+          val oldIndex = slot
+          if (oldIndex >= firstLocalIndex && renumber(oldIndex) != oldIndex) varIns match {
+            case vi: VarInsnNode => vi.`var` = renumber(slot)
+            case ii: IincInsnNode => ii.`var` = renumber(slot)
+          }
         case _ =>
       }
       true
@@ -431,154 +773,181 @@ object LocalOptImpls {
     // A set of all exception handlers that guard the current instruction, required for simplifyGotoReturn
     var activeHandlers = Set.empty[TryCatchBlockNode]
 
-    // Instructions that need to be removed. simplifyBranchOverGoto returns an instruction to be
-    // removed. It cannot remove it itself because the instruction may be the successor of the current
-    // instruction of the iterator, which is not supported in ASM.
-    var instructionsToRemove = Set.empty[AbstractInsnNode]
+    val jumpInsns = mutable.LinkedHashMap.empty[JumpInsnNode, Boolean]
 
-    val iterator = method.instructions.iterator()
-    while (iterator.hasNext) {
-      val instruction = iterator.next()
+    for (insn <- method.instructions.iterator().asScala) insn match {
+      case l: LabelNode =>
+        activeHandlers ++= allHandlers.filter(_.start == l)
+        activeHandlers = activeHandlers.filter(_.end != l)
 
-      instruction match {
-        case l: LabelNode =>
-          activeHandlers ++= allHandlers.filter(_.start == l)
-          activeHandlers = activeHandlers.filter(_.end != l)
-        case _ =>
+      case ji: JumpInsnNode =>
+        jumpInsns(ji) = activeHandlers.nonEmpty
+
+      case _ =>
+    }
+
+    var _jumpTargets: Set[AbstractInsnNode] = null
+    def jumpTargets = {
+      if (_jumpTargets == null) {
+        _jumpTargets = jumpInsns.keysIterator.map(_.label).toSet
       }
+      _jumpTargets
+    }
 
-      if (instructionsToRemove(instruction)) {
-        iterator.remove()
-        instructionsToRemove -= instruction
-      } else if (isJumpNonJsr(instruction)) { // fast path - all of the below only treat jumps
-        var jumpRemoved = simplifyThenElseSameTarget(method, instruction)
+    def removeJumpFromMap(jump: JumpInsnNode) = {
+      jumpInsns.remove(jump)
+      _jumpTargets = null
+    }
 
-        if (!jumpRemoved) {
-          changed = collapseJumpChains(instruction) || changed
-          jumpRemoved = removeJumpToSuccessor(method, instruction)
+    def replaceJumpByPop(jump: JumpInsnNode) = {
+      removeJumpAndAdjustStack(method, jump)
+      removeJumpFromMap(jump)
+    }
 
-          if (!jumpRemoved) {
-            val staleGoto = simplifyBranchOverGoto(method, instruction)
-            instructionsToRemove ++= staleGoto
-            changed ||= staleGoto.nonEmpty
-            changed = simplifyGotoReturn(method, instruction, inTryBlock = activeHandlers.nonEmpty) || changed
-          }
+    /**
+     * Removes a conditional jump if it is followed by a GOTO to the same destination.
+     *
+     *      CondJump l;  [nops];  GOTO l;  [...]
+     *      POP*;        [nops];  GOTO l;  [...]
+     *
+     * Introduces 1 or 2 POP instructions, depending on the number of values consumed by the CondJump.
+     */
+    def simplifyThenElseSameTarget(insn: AbstractInsnNode): Boolean = insn match {
+      case ConditionalJump(jump) =>
+        nextExecutableInstruction(insn) match {
+          case Some(Goto(elseJump)) if sameTargetExecutableInstruction(jump, elseJump) =>
+            replaceJumpByPop(jump)
+            true
+
+          case _ => false
         }
-        changed ||= jumpRemoved
-      }
+
+      case _ => false
     }
-    assert(instructionsToRemove.isEmpty, "some optimization required removing a previously traversed instruction. add `instructionsToRemove.foreach(method.instructions.remove)`")
-    changed
-  }
 
-  /**
-   * Removes a conditional jump if it is followed by a GOTO to the same destination.
-   *
-   *      CondJump l;  [nops];  GOTO l;  [...]
-   *      POP*;        [nops];  GOTO l;  [...]
-   *
-   * Introduces 1 or 2 POP instructions, depending on the number of values consumed by the CondJump.
-   */
-  private def simplifyThenElseSameTarget(method: MethodNode, instruction: AbstractInsnNode): Boolean = instruction match {
-    case ConditionalJump(jump) =>
-      nextExecutableInstruction(instruction) match {
-        case Some(Goto(elseJump)) if sameTargetExecutableInstruction(jump, elseJump) =>
-          removeJumpAndAdjustStack(method, jump)
+    /**
+     * Replace jumps to a sequence of GOTO instructions by a jump to the final destination.
+     *
+     * {{{
+     *      Jump l;  [any ops];  l: GOTO m;  [any ops];  m: GOTO n;  [any ops];   n: NotGOTO; [...]
+     *   => Jump n;  [rest unchanged]
+     * }}}
+     *
+     * If there's a loop of GOTOs, the initial jump is replaced by one of the labels in the loop.
+     */
+    def collapseJumpChains(insn: AbstractInsnNode): Boolean = insn match {
+      case JumpNonJsr(jump) =>
+        val target = finalJumpTarget(jump)
+        if (jump.label == target) false else {
+          jump.label = target
+          _jumpTargets = null
           true
+        }
 
-        case _ => false
-      }
-    case _ => false
-  }
+      case _ => false
+    }
 
-  /**
-   * Replace jumps to a sequence of GOTO instructions by a jump to the final destination.
-   *
-   *      Jump l;  [any ops];  l: GOTO m;  [any ops];  m: GOTO n;  [any ops];   n: NotGOTO; [...]
-   *   => Jump n;  [rest unchanged]
-   *
-   * If there's a loop of GOTOs, the initial jump is replaced by one of the labels in the loop.
-   */
-  private def collapseJumpChains(instruction: AbstractInsnNode): Boolean = instruction match {
-    case JumpNonJsr(jump) =>
-      val target = finalJumpTarget(jump)
-      if (jump.label == target) false else {
-        jump.label = target
+    /**
+     * Eliminates unnecessary jump instructions
+     *
+     * {{{
+     *      Jump l;  [nops];  l: [...]
+     *   => POP*;    [nops];  l: [...]
+     * }}}
+     *
+     * Introduces 0, 1 or 2 POP instructions, depending on the number of values consumed by the Jump.
+     */
+    def removeJumpToSuccessor(insn: AbstractInsnNode): Boolean = insn match {
+      case JumpNonJsr(jump) if nextExecutableInstruction(jump, alsoKeep = Set(jump.label)) contains jump.label =>
+        replaceJumpByPop(jump)
         true
-      }
 
-    case _ => false
-  }
+      case _ => false
+    }
 
-  /**
-   * Eliminates unnecessary jump instructions
-   *
-   *      Jump l;  [nops];  l: [...]
-   *   => POP*;    [nops];  l: [...]
-   *
-   * Introduces 0, 1 or 2 POP instructions, depending on the number of values consumed by the Jump.
-   */
-  private def removeJumpToSuccessor(method: MethodNode, instruction: AbstractInsnNode) = instruction match {
-    case JumpNonJsr(jump) if nextExecutableInstruction(jump, alsoKeep = Set(jump.label)) == Some(jump.label) =>
-      removeJumpAndAdjustStack(method, jump)
-      true
-    case _ => false
-  }
+    /**
+     * If the "else" part of a conditional branch is a simple GOTO, negates the conditional branch
+     * and eliminates the GOTO.
+     *
+     * {{{
+     *      CondJump l;         [nops, no jump targets];  GOTO m;  [nops];  l: [...]
+     *   => NegatedCondJump m;  [nops, no jump targets];           [nops];  l: [...]
+     * }}}
+     *
+     * Note that no jump targets are allowed in the first [nops] section. Otherwise, there could
+     * be some other jump to the GOTO, and eliminating it would change behavior.
+     */
+    def simplifyBranchOverGoto(insn: AbstractInsnNode, inTryBlock: Boolean): Boolean = insn match {
+      case ConditionalJump(jump) =>
+        // don't skip over jump targets, see doc comment
+        nextExecutableInstruction(jump, alsoKeep = jumpTargets) match {
+          case Some(Goto(goto)) =>
+            if (nextExecutableInstruction(goto, alsoKeep = Set(jump.label)) contains jump.label) {
+              val newJump = new JumpInsnNode(negateJumpOpcode(jump.getOpcode), goto.label)
+              method.instructions.set(jump, newJump)
+              removeJumpFromMap(jump)
+              jumpInsns(newJump) = inTryBlock
+              replaceJumpByPop(goto)
+              true
+            } else false
+
+          case _ => false
+        }
+      case _ => false
+    }
 
-  /**
-   * If the "else" part of a conditional branch is a simple GOTO, negates the conditional branch
-   * and eliminates the GOTO.
-   *
-   *      CondJump l;         [nops, no labels];  GOTO m;  [nops];  l: [...]
-   *   => NegatedCondJump m;  [nops, no labels];           [nops];  l: [...]
-   *
-   * Note that no label definitions are allowed in the first [nops] section. Otherwise, there could
-   * be some other jump to the GOTO, and eliminating it would change behavior.
-   *
-   * For technical reasons, we cannot remove the GOTO here (*).Instead this method returns an Option
-   * containing the GOTO that needs to be eliminated.
-   *
-   * (*) The ASM instruction iterator (used in the caller [[simplifyJumps]]) has an undefined
-   *     behavior if the successor of the current instruction is removed, which may be the case here
-   */
-  private def simplifyBranchOverGoto(method: MethodNode, instruction: AbstractInsnNode): Option[JumpInsnNode] = instruction match {
-    case ConditionalJump(jump) =>
-      // don't skip over labels, see doc comment
-      nextExecutableInstruction(jump, alsoKeep = _.isInstanceOf[LabelNode]) match {
-        case Some(Goto(goto)) =>
-          if (nextExecutableInstruction(goto, alsoKeep = Set(jump.label)) == Some(jump.label)) {
-            val newJump = new JumpInsnNode(negateJumpOpcode(jump.getOpcode), goto.label)
-            method.instructions.set(jump, newJump)
-            Some(goto)
-          } else None
-
-        case _ => None
-      }
-    case _ => None
-  }
+    /**
+     * Inlines xRETURN and ATHROW
+     *
+     * {{{
+     *      GOTO l;            [any ops];  l: xRETURN/ATHROW
+     *   => xRETURN/ATHROW;    [any ops];  l: xRETURN/ATHROW
+     * }}}
+     *
+     * inlining is only done if the GOTO instruction is not part of a try block, otherwise the
+     * rewrite might change the behavior. For xRETURN, the reason is that return instructions may throw
+     * an IllegalMonitorStateException, as described here:
+     *   http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-6.html#jvms-6.5.return
+     */
+    def simplifyGotoReturn(instruction: AbstractInsnNode, inTryBlock: Boolean): Boolean = !inTryBlock && (instruction match {
+      case Goto(jump) =>
+        nextExecutableInstruction(jump.label) match {
+          case Some(target) =>
+            if (isReturn(target) || target.getOpcode == ATHROW) {
+              method.instructions.set(jump, target.clone(null))
+              removeJumpFromMap(jump)
+              true
+            } else false
+
+          case _ => false
+        }
+      case _ => false
+    })
 
-  /**
-   * Inlines xRETURN and ATHROW
-   *
-   *      GOTO l;            [any ops];  l: xRETURN/ATHROW
-   *   => xRETURN/ATHROW;    [any ops];  l: xRETURN/ATHROW
-   *
-   * inlining is only done if the GOTO instruction is not part of a try block, otherwise the
-   * rewrite might change the behavior. For xRETURN, the reason is that return instructions may throw
-   * an IllegalMonitorStateException, as described here:
-   *   http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-6.html#jvms-6.5.return
-   */
-  private def simplifyGotoReturn(method: MethodNode, instruction: AbstractInsnNode, inTryBlock: Boolean): Boolean = !inTryBlock && (instruction match {
-    case Goto(jump) =>
-      nextExecutableInstruction(jump.label) match {
-        case Some(target) =>
-          if (isReturn(target) || target.getOpcode == Opcodes.ATHROW) {
-            method.instructions.set(jump, target.clone(null))
-            true
-          } else false
+    def run(): Boolean = {
+      var changed = false
+
+      // `.toList` because we're modifying the map while iterating over it
+      for ((jumpInsn, inTryBlock) <- jumpInsns.toList if jumpInsns.contains(jumpInsn) && isJumpNonJsr(jumpInsn)) {
+        var jumpRemoved = simplifyThenElseSameTarget(jumpInsn)
+
+        if (!jumpRemoved) {
+          changed = collapseJumpChains(jumpInsn) || changed
+          jumpRemoved = removeJumpToSuccessor(jumpInsn)
+
+          if (!jumpRemoved) {
+            changed = simplifyBranchOverGoto(jumpInsn, inTryBlock) || changed
+            changed = simplifyGotoReturn(jumpInsn, inTryBlock) || changed
+          }
+        }
 
-        case _ => false
+        changed ||= jumpRemoved
       }
-    case _ => false
-  })
+
+      if (changed) run()
+      changed
+    }
+
+    run()
+  }
 }