24 files changed, 1969 insertions, 727 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala
index cd7e0b83e8..18a495e5fd 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala
@@ -55,6 +55,13 @@ object AsmUtils {
     node
   }
 
+  def readClass(filename: String): ClassNode = {
+    val f = new java.io.RandomAccessFile(filename, "r")
+    val b = new Array[Byte](f.length.toInt)
+    f.read(b)
+    readClass(b)
+  }
+
   /**
    * Returns a human-readable representation of the cnode ClassNode.
    */
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala
index 93f5159f89..42738d3e1c 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala
@@ -390,6 +390,17 @@ final class BCodeAsmCommon[G <: Global](val global: G) {
 
     val isEffectivelyFinal = classSym.isEffectivelyFinal
 
+    val sam = {
+      if (classSym.isImplClass || classSym.isEffectivelyFinal) None
+      else {
+        // Phase travel necessary. For example, nullary methods (getter of an abstract val) get an
+        // empty parameter list in later phases and would therefore be picked as SAM.
+        val samSym = exitingPickler(definitions.findSam(classSym.tpe))
+        if (samSym == NoSymbol) None
+        else Some(samSym.javaSimpleName.toString + methodSymToDescriptor(samSym))
+      }
+    }
+
     var warning = Option.empty[ClassSymbolInfoFailureSI9111]
 
     // Primitive methods cannot be inlined, so there's no point in building a MethodInlineInfo. Also, some
@@ -447,7 +458,7 @@ final class BCodeAsmCommon[G <: Global](val global: G) {
         }
     }).toMap
 
-    InlineInfo(traitSelfType, isEffectivelyFinal, methodInlineInfos, warning)
+    InlineInfo(traitSelfType, isEffectivelyFinal, sam, methodInlineInfos, warning)
   }
 }
 
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala
index a9b6a312e9..075a44ca22 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala
@@ -9,7 +9,6 @@ package backend
 package jvm
 
 import scala.collection.{ mutable, immutable }
-import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository
 import scala.tools.nsc.symtab._
 
 import scala.tools.asm
@@ -140,11 +139,6 @@ abstract class BCodeSkelBuilder extends BCodeHelpers {
       if (AsmUtils.traceClassEnabled && cnode.name.contains(AsmUtils.traceClassPattern))
         AsmUtils.traceClass(cnode)
 
-      if (settings.YoptAddToBytecodeRepository) {
-        // The inliner needs to find all classes in the code repo, also those being compiled
-        byteCodeRepository.add(cnode, ByteCodeRepository.CompilationUnit)
-      }
-
       assert(cd.symbol == claszSymbol, "Someone messed up BCodePhase.claszSymbol during genPlainClass().")
     } // end of method genPlainClass()
 
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala
index 0c26e01322..aff2d2d8c9 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala
@@ -11,9 +11,10 @@ import scala.collection.concurrent.TrieMap
 import scala.reflect.internal.util.Position
 import scala.tools.asm
 import asm.Opcodes
-import scala.tools.asm.tree.{MethodNode, MethodInsnNode, InnerClassNode, ClassNode}
+import scala.tools.asm.tree._
 import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo}
 import scala.tools.nsc.backend.jvm.BackendReporting._
+import scala.tools.nsc.backend.jvm.analysis.Analyzers
 import scala.tools.nsc.backend.jvm.opt._
 import scala.collection.convert.decorateAsScala._
 import scala.tools.nsc.settings.ScalaSettings
@@ -38,16 +39,20 @@ abstract class BTypes {
   /**
    * Tools for parsing classfiles, used by the inliner.
    */
-  val byteCodeRepository: ByteCodeRepository
+  val byteCodeRepository: ByteCodeRepository[this.type]
 
   val localOpt: LocalOpt[this.type]
 
   val inliner: Inliner[this.type]
 
+  val inlinerHeuristics: InlinerHeuristics[this.type]
+
   val closureOptimizer: ClosureOptimizer[this.type]
 
   val callGraph: CallGraph[this.type]
 
+  val analyzers: Analyzers[this.type]
+
   val backendReporting: BackendReporting
 
   // Allows to define per-run caches here and in the CallGraph component, which don't have a global
@@ -56,7 +61,6 @@ abstract class BTypes {
   // Allows access to the compiler settings for backend components that don't have a global in scope
   def compilerSettings: ScalaSettings
 
-
   /**
    * A map from internal names to ClassBTypes. Every ClassBType is added to this map on its
    * construction.
@@ -95,6 +99,13 @@ abstract class BTypes {
   val unreachableCodeEliminated: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty)
 
   /**
+   * Cache of methods which have correct `maxLocals` / `maxStack` values assigned. This allows
+   * invoking `computeMaxLocalsMaxStack` whenever running an analyzer but performing the actual
+   * computation only when necessary.
+   */
+  val maxLocalsMaxStackComputed: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty)
+
+  /**
    * Obtain the BType for a type descriptor or internal name. For class descriptors, the ClassBType
    * is constructed by parsing the corresponding classfile.
    * 
@@ -234,6 +245,7 @@ abstract class BTypes {
       InlineInfo(
         traitImplClassSelfType = None,
         isEffectivelyFinal = BytecodeUtils.isFinalClass(classNode),
+        sam = inlinerHeuristics.javaSam(classNode.name),
         methodInfos = methodInfos,
         warning)
     }
@@ -554,6 +566,8 @@ abstract class BTypes {
    * Terminology
    * -----------
    *
+   * Diagram here: https://blogs.oracle.com/darcy/entry/nested_inner_member_and_top
+   *
    *  - Nested class (JLS 8): class whose declaration occurs within the body of another class
    *
    *  - Top-level class (JLS 8): non-nested class
@@ -1104,6 +1118,8 @@ object BTypes {
    * Metadata about a ClassBType, used by the inliner.
    *
    * More information may be added in the future to enable more elaborate inlinine heuristics.
+   * Note that this class should contain information that can only be obtained from the ClassSymbol.
+   * Information that can be computed from the ClassNode should be added to the call graph instead.
    *
    * @param traitImplClassSelfType `Some(tp)` if this InlineInfo describes a trait, and the `self`
    *                               parameter type of the methods in the implementation class is not
@@ -1122,6 +1138,8 @@ object BTypes {
    * @param isEffectivelyFinal     True if the class cannot have subclasses: final classes, module
    *                               classes, trait impl classes.
    *
+   * @param sam                    If this class is a SAM type, the SAM's "$name$descriptor".
+   *
    * @param methodInfos            The [[MethodInlineInfo]]s for the methods declared in this class.
    *                               The map is indexed by the string s"$name$descriptor" (to
    *                               disambiguate overloads).
@@ -1132,10 +1150,11 @@ object BTypes {
    */
   final case class InlineInfo(traitImplClassSelfType: Option[InternalName],
                               isEffectivelyFinal: Boolean,
+                              sam: Option[String],
                               methodInfos: Map[String, MethodInlineInfo],
                               warning: Option[ClassInlineInfoWarning])
 
-  val EmptyInlineInfo = InlineInfo(None, false, Map.empty, None)
+  val EmptyInlineInfo = InlineInfo(None, false, None, Map.empty, None)
 
   /**
    * Metadata about a method, used by the inliner.
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala
index 45d9cc3ff3..8cccc50c69 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala
@@ -7,8 +7,9 @@ package scala.tools.nsc
 package backend.jvm
 
 import scala.tools.asm
+import scala.tools.nsc.backend.jvm.analysis.Analyzers
 import scala.tools.nsc.backend.jvm.opt._
-import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo, InternalName}
+import scala.tools.nsc.backend.jvm.BTypes._
 import BackendReporting._
 import scala.tools.nsc.settings.ScalaSettings
 
@@ -36,16 +37,20 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes {
   val coreBTypes = new CoreBTypesProxy[this.type](this)
   import coreBTypes._
 
-  val byteCodeRepository = new ByteCodeRepository(global.classPath, javaDefinedClasses, recordPerRunCache(collection.concurrent.TrieMap.empty))
+  val byteCodeRepository: ByteCodeRepository[this.type] = new ByteCodeRepository(global.classPath, this)
 
   val localOpt: LocalOpt[this.type] = new LocalOpt(this)
 
   val inliner: Inliner[this.type] = new Inliner(this)
 
+  val inlinerHeuristics: InlinerHeuristics[this.type] = new InlinerHeuristics(this)
+
   val closureOptimizer: ClosureOptimizer[this.type] = new ClosureOptimizer(this)
 
   val callGraph: CallGraph[this.type] = new CallGraph(this)
 
+  val analyzers: Analyzers[this.type] = new Analyzers(this)
+
   val backendReporting: BackendReporting = new BackendReportingImpl(global)
 
   final def initializeCoreBTypes(): Unit = {
@@ -444,7 +449,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes {
         case Right(classNode) =>
           inlineInfoFromClassfile(classNode)
         case Left(missingClass) =>
-          InlineInfo(None, false, Map.empty, Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass)))
+          EmptyInlineInfo.copy(warning = Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass)))
       }
     }
   }
@@ -467,7 +472,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes {
         flags = asm.Opcodes.ACC_SUPER | asm.Opcodes.ACC_PUBLIC | asm.Opcodes.ACC_FINAL,
         nestedClasses = nested,
         nestedInfo = None,
-        InlineInfo(None, true, Map.empty, None))) // no InlineInfo needed, scala never invokes methods on the mirror class
+        inlineInfo = EmptyInlineInfo.copy(isEffectivelyFinal = true))) // no method inline infos needed, scala never invokes methods on the mirror class
       c
     })
   }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala
index b41d0de92f..005d01f187 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala
@@ -1,7 +1,7 @@
 package scala.tools.nsc
 package backend.jvm
 
-import scala.tools.asm.tree.{InvokeDynamicInsnNode, AbstractInsnNode, MethodNode}
+import scala.tools.asm.tree.{AbstractInsnNode, MethodNode}
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
 import scala.reflect.internal.util.Position
 import scala.tools.nsc.settings.ScalaSettings
@@ -228,7 +228,7 @@ object BackendReporting {
 
     def emitWarning(settings: ScalaSettings): Boolean = this match {
       case _: IllegalAccessInstruction | _: MethodWithHandlerCalledOnNonEmptyStack | _: SynchronizedMethod | _: StrictfpMismatch | _: ResultingMethodTooLarge =>
-        settings.YoptWarningEmitAtInlineFailed
+        settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed)
 
       case IllegalAccessCheckFailed(_, _, _, _, _, cause) =>
         cause.emitWarning(settings)
@@ -246,9 +246,11 @@ object BackendReporting {
   case class ResultingMethodTooLarge(calleeDeclarationClass: InternalName, name: String, descriptor: String,
                                      callsiteClass: InternalName, callsiteName: String, callsiteDesc: String) extends CannotInlineWarning
 
+  // TODO: this should be a subtype of CannotInlineWarning
+  // but at the place where it's created (in findIllegalAccess) we don't have the necessary data (calleeName, calleeDescriptor).
   case object UnknownInvokeDynamicInstruction extends OptimizerWarning {
     override def toString = "The callee contains an InvokeDynamic instruction with an unknown bootstrap method (not a LambdaMetaFactory)."
-    def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarningEmitAtInlineFailed
+    def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed)
   }
 
   /**
@@ -260,7 +262,7 @@ object BackendReporting {
 
     override def emitWarning(settings: ScalaSettings): Boolean = this match {
       case RewriteClosureAccessCheckFailed(_, cause) => cause.emitWarning(settings)
-      case RewriteClosureIllegalAccess(_, _)         => settings.YoptWarningEmitAtInlineFailed
+      case RewriteClosureIllegalAccess(_, _)         => settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed)
     }
 
     override def toString: String = this match {
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala
index 03306f30aa..8d0547b607 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala
@@ -8,6 +8,7 @@ package backend.jvm
 
 import scala.reflect.internal.util.Statistics
 
+// Enable with `-Ystatistics:jvm`
 object BackendStats {
   import Statistics.{newTimer, newSubTimer}
   val bcodeTimer = newTimer("time in backend", "jvm")
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
index 00b4b8b667..e1a724f1cb 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
@@ -14,6 +14,7 @@ import scala.reflect.internal.util.Statistics
 
 import scala.tools.asm
 import scala.tools.asm.tree.ClassNode
+import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository
 
 /*
  *  Prepare in-memory representations of classfiles using the ASM Tree API, and serialize them to disk.
@@ -186,7 +187,7 @@ abstract class GenBCode extends BCodeSyncAndTry {
         // -------------- "plain" class --------------
         val pcb = new PlainClassBuilder(cunit)
         pcb.genPlainClass(cd)
-        val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null;
+        val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null
         val plainC = pcb.cnode
 
         // -------------- bean info class, if needed --------------
@@ -221,12 +222,18 @@ abstract class GenBCode extends BCodeSyncAndTry {
     class Worker2 {
       def runGlobalOptimizations(): Unit = {
         import scala.collection.convert.decorateAsScala._
-        if (settings.YoptBuildCallGraph) {
-          q2.asScala foreach {
-            case Item2(_, _, plain, _, _) =>
-              // skip mirror / bean: wd don't inline into tem, and they are not used in the plain class
-              if (plain != null) callGraph.addClass(plain)
-          }
+
+        // add classes to the bytecode repo before building the call graph: the latter needs to
+        // look up classes and methods in the code repo.
+        if (settings.YoptAddToBytecodeRepository) q2.asScala foreach {
+          case Item2(_, mirror, plain, bean, _) =>
+            if (mirror != null) byteCodeRepository.add(mirror, ByteCodeRepository.CompilationUnit)
+            if (plain != null)  byteCodeRepository.add(plain, ByteCodeRepository.CompilationUnit)
+            if (bean != null)   byteCodeRepository.add(bean, ByteCodeRepository.CompilationUnit)
+        }
+        if (settings.YoptBuildCallGraph) q2.asScala foreach { item =>
+          // skip call graph for mirror / bean: wd don't inline into tem, and they are not used in the plain class
+          if (item.plain != null) callGraph.addClass(item.plain)
         }
         if (settings.YoptInlinerEnabled)
           bTypes.inliner.runInliner()
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
index 7bbe1e2a49..9e5fbfcc0e 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
@@ -3,17 +3,22 @@ package backend.jvm
 package analysis
 
 import scala.annotation.switch
-import scala.collection.{mutable, immutable}
+import scala.collection.mutable
 import scala.tools.asm.Opcodes
 import scala.tools.asm.tree._
 import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter}
 import opt.BytecodeUtils._
+import AliasSet.SmallBitSet
 
-object AliasingFrame {
-  private var _idCounter: Long = 0l
-  private def nextId = { _idCounter += 1; _idCounter }
-}
-
+/**
+ * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack.
+ *
+ * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple
+ * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower
+ * than a BasicValue analysis.
+ *
+ * See the doc of package object `analysis` for some notes on the performance of alias analysis.
+ */
 class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) {
   import Opcodes._
 
@@ -24,50 +29,66 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
   }
 
   /**
-   * For each slot (entry in the `values` array of the frame), an id that uniquely represents
-   * the object stored in it. If two values have the same id, they are aliases of the same
-   * object.
-   */
-  private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId)
-
-  /**
-   * The object alias id of for a value index.
-   */
-  def aliasId(entry: Int) = aliasIds(entry)
-
-  /**
-   * Returns the indices of the values array which are aliases of the object `id`.
+   * For every value the set of values that are aliases of it.
+   *
+   * Invariants:
+   *  - If `aliases(i) == null` then i has no aliases. This is equivalent to having
+   *    `aliases(i) == SingletonSet(i)`.
+   *  - If `aliases(i) != null` then `aliases(i) contains i`.
+   *  - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the
+   *    same (mutable) AliasSet.
    */
-  def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id)
+  val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize)
 
   /**
    * The set of aliased values for a given entry in the `values` array.
    */
-  def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry))
+  def aliasesOf(entry: Int): AliasSet = {
+    if (aliases(entry) != null) aliases(entry)
+    else {
+      val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1)
+      aliases(entry) = init
+      init
+    }
+  }
 
   /**
-   * Define a new alias. For example, given
-   *   var a = this       // this, a have the same aliasId
-   * then an assignment
+   * Define a new alias. For example, an assignment
    *   b = a
-   * will set the same the aliasId for `b`.
+   * adds b to the set of aliases of a.
    */
   private def newAlias(assignee: Int, source: Int): Unit = {
-    aliasIds(assignee) = aliasIds(source)
+    removeAlias(assignee)
+    val sourceAliases = aliasesOf(source)
+    sourceAliases += assignee
+    aliases(assignee) = sourceAliases
   }
 
   /**
-   * An assignment
+   * Remove an alias. For example, an assignment
    *   a = someUnknownValue()
-   * sets a fresh alias id for `a`.
-   * A stack value is also removed from its alias set when being consumed.
+   * removes a from its former alias set.
+   * As another example, stack values are removed from their alias sets when being consumed.
    */
   private def removeAlias(assignee: Int): Unit = {
-    aliasIds(assignee) = AliasingFrame.nextId
+    if (aliases(assignee) != null) {
+      aliases(assignee) -= assignee
+      aliases(assignee) = null
+    }
+  }
+
+  /**
+   * Define the alias set for a given value.
+   */
+  private def setAliasSet(assignee: Int, set: AliasSet): Unit = {
+    if (aliases(assignee) != null) {
+      aliases(assignee) -= assignee
+    }
+    aliases(assignee) = set
   }
 
   override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = {
-    // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop)
+    // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop)
     def stackTop: Int = this.stackTop
     def peekStack(n: Int): V = this.peekStack(n)
 
@@ -166,14 +187,34 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
         }
 
       case SWAP =>
+        // could be written more elegantly with higher-order combinators, but thinking of performance
         val top = stackTop
-        val idTop = aliasIds(top)
-        aliasIds(top)     = aliasIds(top - 1)
-        aliasIds(top - 1) = idTop
+
+        def moveNextToTop(): Unit = {
+          val nextAliases = aliases(top - 1)
+          aliases(top) = nextAliases
+          nextAliases -= (top - 1)
+          nextAliases += top
+        }
+
+        if (aliases(top) != null) {
+          val topAliases = aliases(top)
+          if (aliases(top - 1) != null) moveNextToTop()
+          else aliases(top) = null
+          // move top to next
+          aliases(top - 1) = topAliases
+          topAliases -= top
+          topAliases += (top - 1)
+        } else {
+          if (aliases(top - 1) != null) {
+            moveNextToTop()
+            aliases(top - 1) = null
+          }
+        }
 
       case opcode =>
         if (opcode == ASTORE) {
-          // Not a separate case because we need to remove the consumed stack value from alias sets after.
+          // not a separate case: we re-use the code below that removes the consumed stack value from alias sets
           val stackTopBefore = stackTop - produced + consumed
           val local = insn.asInstanceOf[VarInsnNode].`var`
           newAlias(assignee = local, source = stackTopBefore)
@@ -198,10 +239,22 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
         val firstConsumed = stackTop - produced + 1                 // firstConsumed = 3
         for (i <- 0 until consumed)
           removeAlias(firstConsumed + i)                            // remove aliases for 3 and 4
+    }
+  }
 
-        // We don't need to set the aliases ids for the produced values: the aliasIds array already
-        // contains fresh ids for non-used stack values (ensured by removeAlias).
+  /**
+   * When entering an exception handler, all values are dropped from the stack (and the exception
+   * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To
+   * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets.
+   */
+  override def clearStack(): Unit = {
+    var i = getLocals
+    val end = i + getStackSize
+    while (i < end) {
+      removeAlias(i)
+      i += 1
     }
+    super.clearStack()
   }
 
   /**
@@ -217,30 +270,124 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
    *   x = a
    *   y = b     // (x, a) and (y, b)
    * }
-   * [...]       // (x, a)
+   * [...]       // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b))
    */
   override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = {
+    // merge is the main performance hot spot of a data flow analysis.
+
+    // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of
+    // the overall analysis time.
     val valuesChanged = super.merge(other, interpreter)
+
+    // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this
+    // code has been heavily optimized. most of the time is spent in the `hasNext` method of the
+    // andNotIterator, see its comment.
+
     var aliasesChanged = false
     val aliasingOther = other.asInstanceOf[AliasingFrame[_]]
-    for (i <- aliasIds.indices) {
-      val thisAliases = aliasesOf(i)
-      val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i))
-      if (thisNotOther.nonEmpty) {
-        aliasesChanged = true
-        thisNotOther foreach removeAlias
+
+    val numValues = getLocals + getStackSize
+    // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set
+    // for a, we already see that a and b will be aliases in the final result. so we can skip over
+    // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be
+    // set to `true`.
+    val knownOk = new Array[Boolean](numValues)
+    var i = 0
+    while (i < numValues) {
+      if (!knownOk(i)) {
+        val thisAliases = this.aliases(i)
+        val otherAliases = aliasingOther.aliases(i)
+        if (thisAliases != null && otherAliases != null) {
+          // The iterator yields elements that are in `thisAliases` but not in `otherAliases`.
+          // As a side-effect, for every index `i` that is in both alias sets, the iterator sets
+          // `knownOk(i) = true`: the alias sets for these values don't need to be merged again.
+          val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk)
+          if (thisNotOtherIt.hasNext) {
+            aliasesChanged = true
+            val newSet = AliasSet.empty
+            while (thisNotOtherIt.hasNext) {
+              val next = thisNotOtherIt.next()
+              newSet += next
+              setAliasSet(next, newSet)
+            }
+          }
+        }
       }
+      i += 1
     }
+
     valuesChanged || aliasesChanged
   }
 
+  private def min(s: SmallBitSet) = {
+    var r = s.a
+    if (             s.b < r) r = s.b
+    if (s.c != -1 && s.c < r) r = s.c
+    if (s.d != -1 && s.d < r) r = s.d
+    r
+  }
+
   override def init(src: Frame[_ <: V]): Frame[V] = {
-    super.init(src)
-    compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length)
+    super.init(src) // very quick (just an arraycopy)
+    System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick
+
+    val newSets = mutable.HashMap.empty[AliasSet, AliasSet]
+
+    // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to
+    // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time.
+    // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are
+    // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the
+    // two values.
+
+    var i = 0
+    while (i < aliases.length) {
+      val set = aliases(i)
+      if (set != null) {
+        // size cannot be 0 - alias sets are always at least singletons.
+        // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow
+        if (set.size == 1) {
+          aliases(i) = null
+        } else if (set.size <= 4) {
+          val small = set.set.asInstanceOf[AliasSet.SmallBitSet]
+          val firstOfSet = i == min(small)
+          if (firstOfSet) {
+            val newSet = set.clone()
+            aliases(small.a) = newSet
+            aliases(small.b) = newSet
+            if (small.c != -1) aliases(small.c) = newSet
+            if (small.d != -1) aliases(small.d) = newSet
+          }
+        } else {
+          // the actual hot spot is the hash map operations here: this is where almost all of the 20%
+          // mentioned above is spent.
+          // i also benchmarked an alternative implementation: keep an array of booleans for indexes
+          // that already contain the cloned set. iterate through all elements of the cloned set and
+          // assign the cloned set. this approach is 50% slower than using a hash map.
+          if (newSets contains set) aliases(i) = newSets(set)
+          else {
+            val newSet = set.clone()
+            newSets(set) = newSet
+            aliases(i) = newSet
+          }
+        }
+      }
+      i += 1
+    }
     this
   }
 }
 
+object AliasingFrame {
+//  val start1 = AliasingFrame.timer1.start()
+//  AliasingFrame.timer1.stop(start1)
+  import scala.reflect.internal.util.Statistics._
+  val timer1 = newTimer("t1", "jvm")
+  val timer2 = newTimer("t2", "jvm")
+  val timer3 = newTimer("t3", "jvm")
+  val timers = List(timer1, timer2, timer3)
+  def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 }
+}
+
 /**
  * An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis
  * needs to track aliases, but doesn't require a more specific Frame subclass.
@@ -249,3 +396,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer
   override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack)
   override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src)
 }
+
+/**
+ * An iterator over Int (required to prevent boxing the result of next).
+ */
+abstract class IntIterator extends Iterator[Int] {
+  def hasNext: Boolean
+  def next(): Int
+}
+
+/**
+ * An efficient mutable bit set.
+ *
+ * @param set  Either a SmallBitSet or an Array[Long]
+ * @param size The size of the set, useful for performance of certain operations
+ */
+class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) {
+  import AliasSet._
+
+  override def toString: String = set.toString
+
+  /**
+   * An iterator for the elements of this bit set. Note that only one iterator can be used at a
+   * time. Also make sure not to change the underlying AliasSet during iteration.
+   */
+  def iterator: IntIterator = andNotIterator(this, empty, null)
+
+  def +=(value: Int): Unit = this.set match {
+    case s: SmallBitSet => (size: @switch) match {
+      case 0 =>                                                     s.a = value; size = 1
+      case 1 => if (value != s.a)                                 { s.b = value; size = 2 }
+      case 2 => if (value != s.a && value != s.b)                 { s.c = value; size = 3 }
+      case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 }
+      case 4 =>
+        if (value != s.a && value != s.b && value != s.c && value != s.d) {
+          this.set = bsEmpty
+          this.size = 0
+          bsAdd(this, s.a)
+          bsAdd(this, s.b)
+          bsAdd(this, s.c)
+          bsAdd(this, s.d)
+          bsAdd(this, value)
+        }
+    }
+    case bits: Array[Long] =>
+      bsAdd(this, value)
+  }
+
+  def -=(value: Int): Unit = this.set match {
+    case s: SmallBitSet => (size: @switch) match {
+      case 0 =>
+      case 1 =>
+        if      (value == s.a) { s.a = -1; size = 0 }
+      case 2 =>
+        if      (value == s.a) { s.a = s.b; s.b = -1; size = 1 }
+        else if (value == s.b) {            s.b = -1; size = 1 }
+      case 3 =>
+        if      (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 }
+        else if (value == s.b) {            s.b = s.c; s.c = -1; size = 2 }
+        else if (value == s.c) {                       s.c = -1; size = 2 }
+      case 4 =>
+        if      (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.b) {            s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.c) {                       s.c = s.d; s.d = -1; size = 3 }
+        else if (value == s.d) {                                  s.d = -1; size = 3 }
+    }
+    case bits: Array[Long] =>
+      bsRemove(this, value)
+      if (this.size == 4)
+        this.set = bsToSmall(this.set.asInstanceOf[Array[Long]])
+  }
+
+  override def clone(): AliasSet = {
+    val resSet = this.set match {
+      case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d)
+      case bits: Array[Long] => bits.clone()
+    }
+    new AliasSet(resSet, this.size)
+  }
+}
+
+object AliasSet {
+  def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0)
+
+  final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) {
+    override def toString = s"($a, $b, $c, $d)"
+  }
+
+  def bsEmpty: Array[Long] = new Array[Long](1)
+
+  private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = {
+    if (index < set.length) set
+    else {
+      var newLength = set.length
+      while (index >= newLength) newLength *= 2
+      val newSet = new Array[Long](newLength)
+      Array.copy(set, 0, newSet, 0, set.length)
+      newSet
+    }
+  }
+
+  def bsAdd(set: AliasSet, bit: Int): Unit = {
+    val bits = set.set.asInstanceOf[Array[Long]]
+    val index = bit >> 6
+    val resSet = bsEnsureCapacity(bits, index)
+    val before = resSet(index)
+    val result = before | (1l << bit)
+    if (result != before) {
+      resSet(index) = result
+      set.set = resSet
+      set.size += 1
+    }
+  }
+
+  def bsRemove(set: AliasSet, bit: Int): Unit = {
+    val bits = set.set.asInstanceOf[Array[Long]]
+    val index = bit >> 6
+    if (index < bits.length) {
+      val before = bits(index)
+      val result = before & ~(1l << bit)
+      if (result != before) {
+        bits(index) = result
+        set.size -= 1
+      }
+    }
+  }
+
+  def bsContains(set: Array[Long], bit: Int): Boolean = {
+    val index = bit >> 6
+    bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L
+  }
+
+//  var sizesHist: Array[Int] = new Array[Int](1000)
+
+  /**
+   * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits.
+   */
+  def bsToSmall(bits: Array[Long]): SmallBitSet = {
+    var a = -1
+    var b = -1
+    var c = -1
+    var i = 0
+    val end = bits.length * 64
+    while (i < end) {
+      if (bsContains(bits, i)) {
+        if (a == -1) a = i
+        else if (b == -1) b = i
+        else if (c == -1) c = i
+        else return new SmallBitSet(a, b, c, i)
+      }
+      i += 1
+    }
+    null
+  }
+
+  /**
+   * An iterator that yields the elements that are in one bit set and not in another (&~).
+   */
+  private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator {
+    // values in the first bit set
+    private var a, b, c, d = -1
+    private var xs: Array[Long] = null
+
+    // values in the second bit set
+    private var notA, notB, notC, notD = -1
+    private var notXs: Array[Long] = null
+
+    // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext
+    private var abcdNext = -1
+
+    // counts through elements in the `xs` bit set
+    private var i = 0
+    // true if the current value of `i` should be returned by this iterator
+    private var iValid = false
+
+    setA.set match {
+      case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d
+      case bits: Array[Long] => xs = bits
+    }
+
+    setB.set match {
+      case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d
+      case bits: Array[Long] => notXs = bits
+    }
+
+    // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true.
+    // hacky side-effect, used for performance of AliasingFrame.merge.
+    private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true
+
+    private def checkABCD(x: Int, num: Int): Boolean = {
+      // assert(x == a && num == 1 || x == b && num == 2 || ...)
+      x != -1 && {
+        val otherHasA = x == notA || x == notB  || x == notC || x == notD || (notXs != null && bsContains(notXs, x))
+        if (otherHasA) setThisAndOther(x)
+        else abcdNext = x
+        (num: @switch) match {
+          case 1 => a = -1
+          case 2 => b = -1
+          case 3 => c = -1
+          case 4 => d = -1
+        }
+        !otherHasA
+      }
+    }
+
+    // main performance hot spot
+    private def checkXs = {
+      (xs != null) && {
+        val end = xs.length * 64
+
+        while (i < end && {
+          val index = i >> 6
+          if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time.
+            i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body
+            true
+          } else {
+            val mask = 1l << i
+            // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks
+            val thisHasI = (xs(index) & mask) != 0l
+            !thisHasI || {
+              val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l)
+              if (otherHasI) setThisAndOther(i)
+              otherHasI
+            }
+          }
+        }) i += 1
+
+        iValid = i < end
+        iValid
+      }
+    }
+
+    // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time
+    // is spent here. within hasNext, almost the entire time is spent in `checkXs`.
+    //
+    def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs
+
+    def next(): Int = {
+      if (hasNext) {
+        if (abcdNext != -1) {
+          val r = abcdNext; abcdNext = -1; r
+        } else {
+          val r = i; i += 1; iValid = false; r
+        }
+      } else Iterator.empty.next()
+    }
+  }
+
+//  The number of bits in a bit array. Useful for debugging.
+//  def bsSize(bits: Array[Long]) = {
+//    var r = 0
+//    var i = 0
+//    while (i < bits.length) {
+//      r += java.lang.Long.bitCount(bits(i))
+//      i += 1
+//    }
+//    r
+//  }
+
+  /**
+   * An iterator returning the elements in a that are not also in b (a &~ b).
+   *
+   * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that
+   * is both in a and b (&).
+   */
+  def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther)
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala
new file mode 100644
index 0000000000..bb5c6e3820
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala
@@ -0,0 +1,48 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import scala.tools.asm.tree.{AbstractInsnNode, MethodNode}
+import scala.tools.asm.tree.analysis.{Frame, BasicInterpreter, Analyzer, Value}
+import scala.tools.nsc.backend.jvm.BTypes._
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+
+/**
+ * This component hosts tools for running ASM analyzers that require access to a `BTypes` instance.
+ * In particular, the AsmAnalyzer class runs `computeMaxLocalsMaxStack` on the methodNode to be
+ * analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run
+ * cache `maxLocalsMaxStackComputed` defined in there.
+ */
+class Analyzers[BT <: BTypes](val btypes: BT) {
+  import btypes._
+
+  /**
+   * A wrapper to make ASM's Analyzer a bit easier to use.
+   */
+  class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) {
+    localOpt.computeMaxLocalsMaxStack(methodNode)
+    analyzer.analyze(classInternalName, methodNode)
+    def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode)
+  }
+
+  /**
+   * See the doc comment on package object `analysis` for a discussion on performance.
+   */
+  object AsmAnalyzer {
+    // jvm limit is 65535 for both number of instructions and number of locals
+
+    private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals
+
+    // with the limits below, analysis should not take more than one second
+
+    private val nullnessSizeLimit    = 5000l * 600l  * 600l    // 5000 insns, 600 locals
+    private val basicValueSizeLimit  = 9000l * 1000l * 1000l
+    private val sourceValueSizeLimit = 8000l * 950l  * 950l
+
+    def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
+    def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit
+    def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit
+  }
+
+  class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
index 31b62f747e..f9ac12eb4d 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
@@ -7,66 +7,12 @@ import java.util
 import scala.annotation.switch
 import scala.tools.asm.{Type, Opcodes}
 import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode}
-import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value}
+import scala.tools.asm.tree.analysis._
 import scala.tools.nsc.backend.jvm.opt.BytecodeUtils
 import BytecodeUtils._
 
 /**
- * Some notes on the ASM analyzer framework.
- *
- * Value
- *  - Abstract, needs to be implemented for each analysis.
- *  - Represents the desired information about local variables and stack values, for example:
- *    - Is this value known to be null / not null?
- *    - What are the instructions that could potentially have produced this value?
- *
- * Interpreter
- *  - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
- *    interpreter, e.g., SourceInterpreter or BasicInterpreter.
- *  - Multiple abstract methods that receive an instruction and the instruction's input values, and
- *    return a value representing the result of that instruction.
- *    - Note: due to control flow, the interpreter can be invoked multiple times for the same
- *      instruction, until reaching a fixed point.
- *  - Abstract `merge` function that computes the least upper bound of two values. Used by
- *    Frame.merge (see below).
- *
- * Frame
- *  - Can be used directly for many analyses, no subclass required.
- *  - Every frame has an array of values: one for each local variable and for each stack slot.
- *    - A `top` index stores the index of the current stack top
- *    - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
- *      value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
- *      stack value.
- *  - Defines the `execute(instruction)` method.
- *    - executing mutates the state of the frame according to the effect of the instruction
- *      - pop consumed values from the stack
- *      - pass them to the interpreter together with the instruction
- *      - if applicable, push the resulting value on the stack
- *  - Defines the `merge(otherFrame)` method
- *    - called by the analyzer when multiple control flow paths lead to an instruction
- *      - the frame at the branching instruction is merged into the current frame of the
- *        instruction (held by the analyzer)
- *      - mutates the values of the current frame, merges all values using interpreter.merge.
- *
- * Analyzer
- *   - Stores a frame for each instruction
- *   - `merge` function takes an instruction and a frame, merges the existing frame for that instr
- *     (from the frames array) with the new frame passed as argument.
- *     if the frame changed, puts the instruction on the work queue (fixpiont).
- *   - initial frame: initialized for first instr by calling interpreter.new[...]Value
- *     for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
- *   - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
- *   - analyze(method): simulate control flow. while work queue non-empty:
- *     - copy the state of `frames[instr]` into a local frame `current`
- *     - call `current.execute(instr, interpreter)`, mutating the `current` frame
- *     - if it's a branching instruction
- *       - for all potential destination instructions
- *         - merge the destination instruction frame with the `current` frame
- *           (this enqueues the destination instr if its frame changed)
- *       - invoke `newControlFlowEdge` (see below)
- *   - the analyzer also tracks active exception handlers at each instruction
- *   - the empty method `newControlFlowEdge` can be overridden to track control flow if required
- *
+ * See the package object `analysis` for details on the ASM analysis framework.
  *
  * Some notes on nullness analysis.
  *
@@ -87,56 +33,34 @@ import BytecodeUtils._
  */
 
 /**
- * Type to represent nullness of values.
- */
-sealed trait Nullness {
-  final def merge(other: Nullness) = if (this == other) this else Unknown
-}
-case object NotNull extends Nullness
-case object Unknown extends Nullness
-case object Null    extends Nullness
-
-/**
  * Represents the nullness state for a local variable or stack value.
  *
- * Note that nullness of primitive values is not tracked, it will be always [[Unknown]].
+ * Note that nullness of primitive values is not tracked, it will be always unknown.
  */
-sealed trait NullnessValue extends Value {
-  /**
-   * The nullness of this value.
-   */
-  def nullness: Nullness
-
-  /**
-   * True if this value is a long or double. The Analyzer framework needs to know
-   * the size of each value when interpreting instructions, see `Frame.execute`.
-   */
-  def isSize2: Boolean
+sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value {
   /**
    * The size of the slot described by this value. Cannot be 0 because no values are allocated
    * for void-typed slots, see NullnessInterpreter.newValue.
    **/
   def getSize: Int = if (isSize2) 2 else 1
 
-  def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2)
+  def merge(other: NullnessValue) = {
+    if (this eq other) this
+    else if (this eq UnknownValue2) this // the only possible value of size two
+    else UnknownValue1
+  }
+
+  final override def equals(other: Any) = this eq other.asInstanceOf[Object]
 }
 
-object NullValue     extends NullnessValue { def nullness = Null;    def isSize2 = false; override def toString = "Null"     }
-object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" }
-object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true;  override def toString = "Unknown2" }
-object NotNullValue  extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull"  }
+object NullValue     extends NullnessValue(isSize2 = false) { override def toString = "Null"     }
+object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" }
+object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" }
+object NotNullValue  extends NullnessValue(isSize2 = false) { override def toString = "NotNull"  }
 
 object NullnessValue {
-  def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = {
-    if      (nullness == Null)    NullValue
-    else if (nullness == NotNull) NotNullValue
-    else if (isSize2)             UnknownValue2
-    else                          UnknownValue1
-  }
-
-  def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = {
-    apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2)
-  }
+  def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1
+  def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1
 }
 
 final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) {
@@ -151,29 +75,25 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
     // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes
     //     `newValue(null)` for each local variable. We have to return a value of size 1.
     if (tp == Type.VOID_TYPE) null // (1)
-    else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
+    else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
   }
 
   override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = {
     // For instance methods, the `this` parameter is known to be not null.
-    if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false)
+    if (isInstanceMethod && local == 0) NotNullValue
     else super.newParameterValue(isInstanceMethod, local, tp)
   }
 
-  def newOperation(insn: AbstractInsnNode): NullnessValue = {
-    val nullness = (insn.getOpcode: @switch) match {
-      case Opcodes.ACONST_NULL => Null
+  def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match {
+    case Opcodes.ACONST_NULL => NullValue
 
-      case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
-        case _: String | _: Type => NotNull
-        case _ => Unknown
-      }
-
-      case _ => Unknown
+    case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
+      case _: String | _: Type => NotNullValue
+      case _ => NullnessValue.unknown(insn)
     }
 
     // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call.
-    NullnessValue(nullness, insn)
+    case _ => NullnessValue.unknown(insn)
   }
 
   def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value
@@ -182,26 +102,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
     case Opcodes.CHECKCAST => value
 
     case Opcodes.NEWARRAY |
-         Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false)
+         Opcodes.ANEWARRAY => NotNullValue
 
-    case _ => NullnessValue(Unknown, insn)
+    case _ => NullnessValue.unknown(insn)
   }
 
   def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = {
-    NullnessValue(Unknown, insn)
+    NullnessValue.unknown(insn)
   }
 
-  def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = {
-    NullnessValue(Unknown, isSize2 = false)
-  }
+  def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1
 
   def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match {
     case Opcodes.MULTIANEWARRAY =>
-      NullnessValue(NotNull, isSize2 = false)
+      NotNullValue
 
     case _ =>
       // TODO: use a list of methods that are known to return non-null values
-      NullnessValue(Unknown, insn)
+      NullnessValue.unknown(insn)
   }
 
   def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = ()
@@ -219,8 +137,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
   override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = {
     import Opcodes._
 
-    // get the object id of the object that is known to be not-null after this operation
-    val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match {
+    // get the alias set the object that is known to be not-null after this operation.
+    // alias sets are mutable / mutated, so after super.execute, this set contains the remaining
+    // aliases of the value that becomes not-null.
+    val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match {
       case IALOAD |
            LALOAD |
            FALOAD |
@@ -229,7 +149,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
            BALOAD |
            CALOAD |
            SALOAD =>
-        aliasId(this.stackTop - 1)
+        aliasesOf(this.stackTop - 1)
 
       case IASTORE |
            FASTORE |
@@ -239,35 +159,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
            SASTORE |
            LASTORE |
            DASTORE =>
-        aliasId(this.stackTop - 2)
+        aliasesOf(this.stackTop - 2)
 
       case GETFIELD =>
-        aliasId(this.stackTop)
+        aliasesOf(this.stackTop)
 
       case PUTFIELD =>
-        aliasId(this.stackTop - 1)
+        aliasesOf(this.stackTop - 1)
 
       case INVOKEVIRTUAL |
            INVOKESPECIAL |
            INVOKEINTERFACE =>
         val desc = insn.asInstanceOf[MethodInsnNode].desc
         val numArgs = Type.getArgumentTypes(desc).length
-        aliasId(this.stackTop - numArgs)
+        aliasesOf(this.stackTop - numArgs)
 
       case ARRAYLENGTH |
            MONITORENTER |
            MONITOREXIT =>
-        aliasId(this.stackTop)
+        aliasesOf(this.stackTop)
 
       case _ =>
-        -1
+        null
     }
 
     super.execute(insn, interpreter)
 
-    if (nullCheckedAliasId != -1) {
-      for (i <- valuesWithAliasId(nullCheckedAliasId))
-        this.setValue(i, NotNullValue)
+    if (nullCheckedAliases != null) {
+      val it = nullCheckedAliases.iterator
+      while (it.hasNext)
+        this.setValue(it.next(), NotNullValue)
     }
   }
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
index 1c24acba03..ce2fe943e4 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
@@ -55,24 +55,16 @@ import scala.collection.convert.decorateAsScala._
  *
  * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as
  * copying operations.
+ *
+ * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM)
+ * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge
+ * function (merging producer sets) is more complex than merging simple basic values.
+ * See also the doc comment in the package object `analysis`.
  */
-class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) {
-
-  /* Timers for benchmarking ProdCons
-  import scala.reflect.internal.util.Statistics._
-  import ProdConsAnalyzer._
-  val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer)
-  val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer)
-  */
-
-  val analyzer = new Analyzer(new InitialProducerSourceInterpreter)
+trait ProdConsAnalyzerImpl {
+  val methodNode: MethodNode
 
-//  val start = analyzerTimer.start()
-  analyzer.analyze(classInternalName, methodNode)
-//  analyzerTimer.stop(start)
-//  println(analyzerTimer.line)
-
-  def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode)
+  def frameAt(insn: AbstractInsnNode): Frame[SourceValue]
 
   /**
    * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`.
@@ -404,7 +396,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
 
   /** For each instruction, a set of potential consumers of the produced values. */
   private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = {
-//    val start = consumersTimer.start()
     var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]]
     for {
       insn <- methodNode.instructions.iterator.asScala
@@ -417,8 +408,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
       val outputIndex = producedSlots.indexOf(i)
       res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn))
     }
-//    consumersTimer.stop(start)
-//    println(consumersTimer.line)
     res
   }
 
@@ -426,11 +415,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
   private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty
 }
 
-object ProdConsAnalyzer {
-  import scala.reflect.internal.util.Statistics._
-  val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm")
-}
-
 /**
  * A class for pseudo-instructions representing the initial producers of local values that have
  * no producer instruction in the method:
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
new file mode 100644
index 0000000000..f1ac703532
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
@@ -0,0 +1,374 @@
+package scala.tools.nsc.backend.jvm
+
+/**
+ * Summary on the ASM analyzer framework
+ * --------------------------------------
+ *
+ * Value
+ *  - Abstract, needs to be implemented for each analysis.
+ *  - Represents the desired information about local variables and stack values, for example:
+ *    - Is this value known to be null / not null?
+ *    - What are the instructions that could potentially have produced this value?
+ *
+ * Interpreter
+ *  - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
+ *    interpreter, e.g., SourceInterpreter or BasicInterpreter.
+ *  - Multiple abstract methods that receive an instruction and the instruction's input values, and
+ *    return a value representing the result of that instruction.
+ *    - Note: due to control flow, the interpreter can be invoked multiple times for the same
+ *      instruction, until reaching a fixed point.
+ *  - Abstract `merge` function that computes the least upper bound of two values. Used by
+ *    Frame.merge (see below).
+ *
+ * Frame
+ *  - Can be used directly for many analyses, no subclass required.
+ *  - Every frame has an array of values: one for each local variable and for each stack slot.
+ *    - A `top` index stores the index of the current stack top
+ *    - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
+ *      value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
+ *      stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode.
+ *      See comment below.
+ *  - Defines the `execute(instruction)` method.
+ *    - executing mutates the state of the frame according to the effect of the instruction
+ *      - pop consumed values from the stack
+ *      - pass them to the interpreter together with the instruction
+ *      - if applicable, push the resulting value on the stack
+ *  - Defines the `merge(otherFrame)` method
+ *    - called by the analyzer when multiple control flow paths lead to an instruction
+ *      - the frame at the branching instruction is merged into the current frame of the
+ *        instruction (held by the analyzer)
+ *      - mutates the values of the current frame, merges all values using interpreter.merge.
+ *
+ * Analyzer
+ *   - Stores a frame for each instruction
+ *   - `merge` function takes an instruction and a frame, merges the existing frame for that instr
+ *     (from the frames array) with the new frame passed as argument.
+ *     if the frame changed, puts the instruction on the work queue (fixpiont).
+ *   - initial frame: initialized for first instr by calling interpreter.new[...]Value
+ *     for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
+ *   - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
+ *   - analyze(method): simulate control flow. while work queue non-empty:
+ *     - copy the state of `frames[instr]` into a local frame `current`
+ *     - call `current.execute(instr, interpreter)`, mutating the `current` frame
+ *     - if it's a branching instruction
+ *       - for all potential destination instructions
+ *         - merge the destination instruction frame with the `current` frame
+ *           (this enqueues the destination instr if its frame changed)
+ *       - invoke `newControlFlowEdge` (see below)
+ *   - the analyzer also tracks active exception handlers at each instruction
+ *   - the empty method `newControlFlowEdge` can be overridden to track control flow if required
+ *
+ *
+ * MaxLocals and MaxStack
+ * ----------------------
+ *
+ * At the JVM level, long and double values occupy two slots, both as local variables and on the
+ * stack, as specified in the JVM spec 2.6.2:
+ *   "At any point in time, an operand stack has an associated depth, where a value of type long or
+ *    double contributes two units to the depth and a value of any other type contributes one unit."
+ *
+ * For example, a method
+ *   class A { def f(a: Long, b: Long) = a + b }
+ * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when
+ * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter).
+ *
+ * For running an ASM Analyzer, long and double values occupy two local variable slots, but only
+ * a single slot on the call stack, as shown by the following snippet:
+ *
+ *   import scala.tools.nsc.backend.jvm._
+ *   import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+ *   import scala.collection.convert.decorateAsScala._
+ *   import scala.tools.asm.tree.analysis._
+ *
+ *   val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class")
+ *   val m = cn.methods.iterator.asScala.find(_.name == "f").head
+ *
+ *   // the value is read from the classfile, so it's 4
+ *   println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4
+ *
+ *   // we can safely set it to 2 for running the analyzer.
+ *   m.maxStack = 2
+ *
+ *   val a = new Analyzer(new BasicInterpreter)
+ *   a.analyze(cn.name, m)
+ *   val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode
+ *   val addFrame = a.frameAt(addInsn, m)
+ *
+ *   addFrame.getStackSize // 2: the two long values only take one slot each
+ *   addFrame.getLocals    // 5: this takes one slot, the two long parameters take 2 slots each
+ *
+ *
+ * While running the optimizer, we need to make sure that the `maxStack` value of a method is
+ * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in
+ * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter.
+ *
+ *
+ * Lessons learnt while benchmarking the alias tracking analysis
+ * -------------------------------------------------------------
+ *
+ * Profiling
+ *  - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details
+ *    of a hotspot, don't pay too much attention to the percentages / time counts.
+ *  - Should also try other profilers.
+ *  - Use timers. When a method showed up as a hotspot, i added a timer around that method, and a
+ *    second one within the method to measure specific parts. The timers slow things down, but the
+ *    relative numbers show what parts of a method are slow.
+ *
+ * ASM analyzer insights
+ *  - The time for running an analysis depends on the number of locals and the number of instructions.
+ *    Reducing the number of locals helps speeding up the analysis: there are less values to
+ *    merge when merging to frames.
+ *    See also https://github.com/scala/scala-dev/issues/47
+ *  - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers.
+ *  - For nullness analysis the time is spent as follows
+ *    - 20% merging nullness values. this is as expected: for example, the same absolute amount of
+ *      time is spent in merging BasicValues when running a BasicInterpreter.
+ *    - 50% merging alias sets. i tried to optimize what i could out of this.
+ *    - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init.
+ *  - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to
+ *    `interpreter.merge`. This can be observed easily by running a test program that either runs
+ *    a BasicValue analysis only, versus a program that first runs a nullness analysis and then
+ *    a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms,
+ *    a 3.8x slowdown.
+ *  - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the
+ *    examples I benchmarked, the number of merge invocations was always exactly the same.
+ *    It would probably be possible to come up with an example where alias set merging forces
+ *    additional analysis rounds until reaching the fixpoint, but I did not observe such cases.
+ *
+ * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler
+ * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze
+ * can simply be parsed from a classfile. See example at the end of this comment.
+ *
+ *
+ * Nullness Analysis in Miguel's Optimizer
+ * ---------------------------------------
+ *
+ * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every
+ * frame has an array of values. Miguel's idea was to represent aliasing using reference equality
+ * in the values array: if two entries in the array point to the same value object, the two entries
+ * are aliases in the frame of the given instruction.
+ *
+ * While this idea seems elegant at first sight, Miguel's implementation does not merge frames
+ * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in
+ * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that
+ * c is removed as an alias of (a, b).
+ *
+ * It would be possible to implement correct alias set merging in Miguel's approach. However, frame
+ * merging is the main hot spot of analysis. The computational complexity of implementing alias set
+ * merging by traversing the values array and comparing references is too high. The concrete
+ * alias set representation that is used in the current implementation (see class AliasingFrame)
+ * makes alias set merging more efficient.
+ *
+ * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java
+ *
+ *
+ * Complexity and scaling of analysis
+ * ----------------------------------
+ *
+ * The time complexity of a data flow analysis depends on:
+ *
+ *   - The size of the method. The complexity factor is linear (assuming the number of locals and
+ *     branching instructions remains constant). The main analysis loop runs through all
+ *     instructions of a method once. Instructions are only re-enqueued if a control flow merge
+ *     changes the frame at some instruction.
+ *
+ *   - The branching instructions. When a second (third, ..) control flow edge arrives at an
+ *     instruction, the existing frame at the instruction is merged with the one computed on the
+ *     new branch. If the merge function changes the existing frame, the instruction is enqueued
+ *     for another analysis. This results in a merge operation for the successors of the
+ *     instruction.
+ *
+ *   - The number of local variables. The hot spot of analysis is frame merging. The merge function
+ *     iterates through the values in the frame (locals and stack values) and merges them.
+ *
+ * I measured the running time of an analysis for two examples:
+ *   - Keep the number of locals and branching instructions constant, increase the number of
+ *     instructions. The running time grows linearly with the method size.
+ *   - Increase the size and number of locals in a method. The method size and number of locals
+ *     grow in the same pace. Here, the running time increase is polynomial. It looks like the
+ *     complexity is be #instructions * #locals^2 (see below).
+ *
+ * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs
+ * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change.
+ *
+ * The numbers below are for nullness. Note that the the last column is constant, i.e., the running
+ * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal
+ * method size for running an analysis.
+ *
+ *   #insns    #locals    time (ms)       time / #ins * #loc^2 * 10^6
+ *   1305      156        34              1.07
+ *   2610      311        165             0.65
+ *   3915      466        490             0.57
+ *   5220      621        1200            0.59
+ *   6525      776        2220            0.56
+ *   7830      931        3830            0.56
+ *   9135      1086       6570            0.60
+ *   10440     1241       9700            0.60
+ *   11745     1396       13800           0.60
+ *
+ * As a second experiment, nullness analysis was run with varying #insns but constant #locals.
+ * The last column shows linear complexity with respect to the method size (linearOffset = 2279):
+ *
+ *   #insns     #locals     time (ms)    (time + linearOffset) / #insns
+ *   5220       621         1090         0.645
+ *   6224       621         1690         0.637
+ *   7226       621         2280         0.630
+ *   8228       621         2870         0.625
+ *   9230       621         3530         0.629
+ *   10232      621         4130         0.626
+ *   11234      621         4770         0.627
+ *   12236      621         5520         0.637
+ *   13238      621         6170         0.638
+ *
+ *
+ * When running a BasicValue analysis, the complexity observation is the same (time is proportional
+ * to #ins * #loc^2).
+ *
+ *
+ * Measuring analysis execution time
+ * ---------------------------------
+ *
+ * See code below.
+ */
+
+/*
+object Test {
+  val overwrite: Option[String] = null
+
+  @noinline def serialize(o: AnyRef): String = null
+
+  @noinline def deserialize(string: String): AnyRef = null
+
+  @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) {
+    val result = serialize(instance)
+    val reconstituted = deserialize(result).asInstanceOf[T]
+    assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted)))
+  }
+
+  @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) {
+    // pattern match to introduce a lot of control flow, i.e., a lot of frame merges
+    overwrite match {
+      case Some(f) =>
+      case None =>
+        checkRoundTrip(instance)(f)
+        assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance)
+        assert(prevResult == "res", instance)
+    }
+  }
+
+  // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x
+
+  def testMain(): Unit = {
+    // every call to check creates quite a number of locals, and also quite a number of aliases
+    // of the same value (x1). First of all, the default argument call is expanded as below. Then
+    // method check is inlined, and within the body of check, checkRoundTrip and assert have
+    // already been inlined as well.
+
+    // {
+    //   val x1 = () => ""
+    //   val x2 = fun(x1())  // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1
+    //   check(x1())("", x2) // same here for x1
+    // }
+
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 5
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 10
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 15
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 20
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 25
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 30
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 35
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("")
+    check("")("") // 40
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 45
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 50
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("")
+    // check("")("") // 55
+
+    // 1000 bytecode instructions, 0 locals
+    // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10));
+  }
+
+  def timed[T](f: => T): T = {
+    val start = System.nanoTime()
+    val r = f
+    val nanos = System.nanoTime() - start
+    println(s"took ${nanos/1000000}ms")
+    r
+  }
+
+  def main(args: Array[String]): Unit = {
+    import scala.tools.nsc.backend.jvm._
+    val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class")
+    import scala.collection.convert.decorateAsScala._
+    val m = cn.methods.iterator.asScala.find(_.name == "testMain").head
+
+    println(s"${m.instructions.size} instructions - ${m.maxLocals} locals")
+
+    val a = new analysis.NullnessAnalyzer
+    a.analyze(cn.name, m) // warm up
+
+    analysis.AliasingFrame.reset()
+    timed(a.analyze(cn.name, m))
+    analysis.AliasingFrame.timers foreach println
+
+    println("---")
+
+    // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue
+    // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside
+    // Frame.merge is no longer megamorphic.
+
+    import scala.tools.asm.tree.analysis._
+    val ba = new Analyzer(new BasicInterpreter)
+    ba.analyze(cn.name, m) // warm up
+
+    timed(ba.analyze(cn.name, m))
+
+    println("---")
+
+    timed(a.analyze(cn.name, m))
+  }
+}
+*/
+package object analysis
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
index a5b85e54e7..4492d0baf5 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
@@ -10,6 +10,7 @@ package opt
 import scala.tools.asm
 import asm.tree._
 import scala.collection.convert.decorateAsScala._
+import scala.collection.concurrent
 import scala.tools.asm.Attribute
 import scala.tools.nsc.backend.jvm.BackendReporting._
 import scala.tools.nsc.io.AbstractFile
@@ -24,39 +25,52 @@ import java.util.concurrent.atomic.AtomicLong
  * classpath. Parsed classes are cached in the `classes` map.
  *
  * @param classPath The compiler classpath where classfiles are searched and read from.
- * @param classes   Cache for parsed ClassNodes. Also stores the source of the bytecode:
- *                  [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode
- *                  corresponds to a class being compiled.
- *                  The `Long` field encodes the age of the node in the map, which allows removing
- *                  old entries when the map grows too large.
- *                  For Java classes in mixed compilation, the map contains an error message: no
- *                  ClassNode is generated by the backend and also no classfile that could be parsed.
  */
-class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJavaSourceDefined: InternalName => Boolean, val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]]) {
+class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFile], val btypes: BT) {
+  import btypes._
+
+  /**
+   * ClassNodes for classes being compiled in the current compilation run.
+   */
+  val compilingClasses: concurrent.Map[InternalName, ClassNode] = recordPerRunCache(concurrent.TrieMap.empty)
+
+  /**
+   * Cache for parsed ClassNodes.
+   * The `Long` field encodes the age of the node in the map, which allows removing old entries when
+   * the map grows too large (see limitCacheSize).
+   * For Java classes in mixed compilation, the map contains an error message: no ClassNode is
+   * generated by the backend and also no classfile that could be parsed.
+   */
+  val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty)
 
   private val maxCacheSize = 1500
   private val targetSize   = 500
 
-  private val idCounter = new AtomicLong(0)
+  private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable {
+    def clear(): Unit = { this.set(0l) }
+  }
+  recordPerRunCache(lruCounter)
 
   /**
    * Prevent the code repository from growing too large. Profiling reveals that the average size
    * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb.
-   *
-   * We can only remove classes with `Source == Classfile`, those can be parsed again if requested.
    */
   private def limitCacheSize(): Unit = {
-    if (classes.count(c => c._2.isRight && c._2.right.get._2 == Classfile) > maxCacheSize) {
-      val removeId = idCounter.get - targetSize
-      val toRemove = classes.iterator.collect({
-        case (name, Right((_, Classfile, id))) if id < removeId => name
-      }).toList
-      toRemove foreach classes.remove
+    if (parsedClasses.size > maxCacheSize) {
+      // OK if multiple threads get here
+      val minimalLRU = parsedClasses.valuesIterator.collect({
+        case Right((_, lru)) => lru
+      }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue)
+      parsedClasses retain {
+        case (_, Right((_, lru))) => lru > minimalLRU
+        case _ => false
+      }
     }
   }
 
   def add(classNode: ClassNode, source: Source) = {
-    classes(classNode.name) = Right((classNode, source, idCounter.incrementAndGet()))
+    if (source == CompilationUnit) compilingClasses(classNode.name) = classNode
+    else parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet()))
   }
 
   /**
@@ -64,18 +78,32 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
    * parsed from the classfile on the compile classpath.
    */
   def classNodeAndSource(internalName: InternalName): Either[ClassNotFound, (ClassNode, Source)] = {
-    val r = classes.getOrElseUpdate(internalName, {
-      limitCacheSize()
-      parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet()))
+    classNode(internalName) map (n => {
+      val source = if (compilingClasses contains internalName) CompilationUnit else Classfile
+      (n, source)
     })
-    r.map(v => (v._1, v._2))
   }
 
   /**
    * The class node for an internal name. If the class node is not yet available, it is parsed from
    * the classfile on the compile classpath.
    */
-  def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = classNodeAndSource(internalName).map(_._1)
+  def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
+    compilingClasses.get(internalName).map(Right(_)) getOrElse {
+      val r = parsedClasses.get(internalName) match {
+        case Some(l @ Left(_)) => l
+        case Some(r @ Right((classNode, _))) =>
+          parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet()))
+          r
+        case None =>
+          limitCacheSize()
+          val res = parseClass(internalName).map((_, lruCounter.incrementAndGet()))
+          parsedClasses(internalName) = res
+          res
+      }
+      r.map(_._1)
+    }
+  }
 
   /**
    * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`.
@@ -86,7 +114,6 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
    */
   def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = {
     def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = {
-      def msg = s"The field node $name$descriptor could not be found in class $classInternalName or any of its superclasses."
       classNode(parent) match {
         case Left(e)  => Left(FieldNotFound(name, descriptor, classInternalName, Some(e)))
         case Right(c) =>
@@ -105,6 +132,11 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
    * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`.
    * The declaration of the method may be in one of the parents.
    *
+   * TODO: make sure we always return the right method, the one being invoked. write tests.
+   *   - if there's an abstract and a concrete one. could possibly somehow the abstract be returned?
+   *   - with traits and default methods, if there is more than one default method inherited and
+   *     no override: what should be returned? We should not just inline one of the two.
+   *
    * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring
    *         class, or an error message if the method could not be found.
    */
@@ -157,7 +189,7 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav
       classNode
     } match {
       case Some(node) => Right(node)
-      case None       => Left(ClassNotFound(internalName, isJavaSourceDefined(internalName)))
+      case None       => Left(ClassNotFound(internalName, javaDefinedClasses(internalName)))
     }
   }
 }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
index df8dcc690a..ea186f9a1b 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
@@ -173,6 +173,11 @@ object BytecodeUtils {
     case Opcodes.IFNONNULL => Opcodes.IFNULL
   }
 
+  def isSize2LoadOrStore(opcode: Int): Boolean = (opcode: @switch) match {
+    case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true
+    case _ => false
+  }
+
   def getPop(size: Int): InsnNode = {
     val op = if (size == 1) Opcodes.POP else Opcodes.POP2
     new InsnNode(op)
@@ -222,29 +227,6 @@ object BytecodeUtils {
     }
   }
 
-  /**
-   * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM
-   * framework only computes these values during bytecode generation.
-   *
-   * Since there's currently no better way, we run a bytecode generator on the method and extract
-   * the computed values. This required changes to the ASM codebase:
-   *   - the [[MethodWriter]] class was made public
-   *   - accessors for maxLocals / maxStack were added to the MethodWriter class
-   *
-   * We could probably make this faster (and allocate less memory) by hacking the ASM framework
-   * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be
-   * to create a separate visitor for computing those values, duplicating the functionality from the
-   * MethodWriter.
-   */
-  def computeMaxLocalsMaxStack(method: MethodNode): Unit = {
-    val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS)
-    val excs = method.exceptions.asScala.toArray
-    val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter]
-    method.accept(mw)
-    method.maxLocals = mw.getMaxLocals
-    method.maxStack = mw.getMaxStack
-  }
-
   def codeSizeOKForInlining(caller: MethodNode, callee: MethodNode): Boolean = {
     // Looking at the implementation of CodeSizeEvaluator, all instructions except tableswitch and
     // lookupswitch are <= 8 bytes. These should be rare enough for 8 to be an OK rough upper bound.
@@ -308,14 +290,14 @@ object BytecodeUtils {
    * Clone the local variable descriptors of `methodNode` and map their `start` and `end` labels
    * according to the `labelMap`.
    */
-  def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String): List[LocalVariableNode] = {
+  def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String, shift: Int): List[LocalVariableNode] = {
     methodNode.localVariables.iterator().asScala.map(localVariable => new LocalVariableNode(
       prefix + localVariable.name,
       localVariable.desc,
       localVariable.signature,
       labelMap(localVariable.start),
       labelMap(localVariable.end),
-      localVariable.index
+      localVariable.index + shift
     )).toList
   }
 
@@ -352,15 +334,6 @@ object BytecodeUtils {
     }
   }
 
-  /**
-   * A wrapper to make ASM's Analyzer a bit easier to use.
-   */
-  class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, interpreter: Interpreter[V] = new BasicInterpreter) {
-    val analyzer = new Analyzer(interpreter)
-    analyzer.analyze(classInternalName, methodNode)
-    def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode)
-  }
-
   implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal {
     def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction))
   }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
index 96455c0e38..b9788c3f56 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala
@@ -7,104 +7,92 @@ package scala.tools.nsc
 package backend.jvm
 package opt
 
+import scala.collection.immutable.IntMap
 import scala.reflect.internal.util.{NoPosition, Position}
-import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter}
 import scala.tools.asm.{Opcodes, Type, Handle}
 import scala.tools.asm.tree._
-import scala.collection.concurrent
+import scala.collection.{concurrent, mutable}
 import scala.collection.convert.decorateAsScala._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
 import scala.tools.nsc.backend.jvm.BackendReporting._
-import scala.tools.nsc.backend.jvm.analysis.{NotNull, NullnessAnalyzer}
+import scala.tools.nsc.backend.jvm.analysis._
 import ByteCodeRepository.{Source, CompilationUnit}
 import BytecodeUtils._
 
 class CallGraph[BT <: BTypes](val btypes: BT) {
   import btypes._
+  import analyzers._
 
-  val callsites: concurrent.Map[MethodInsnNode, Callsite] = recordPerRunCache(concurrent.TrieMap.empty)
-
-  val closureInstantiations: concurrent.Map[InvokeDynamicInsnNode, ClosureInstantiation] = recordPerRunCache(concurrent.TrieMap.empty)
-
-  def addClass(classNode: ClassNode): Unit = {
-    val classType = classBTypeFromClassNode(classNode)
-    for {
-      m <- classNode.methods.asScala
-      (calls, closureInits) = analyzeCallsites(m, classType)
-    } {
-      calls foreach (callsite => callsites(callsite.callsiteInstruction) = callsite)
-      closureInits foreach (lmf => closureInstantiations(lmf.indy) = ClosureInstantiation(lmf, m, classType))
-    }
-  }
+  /**
+   * The call graph contains the callsites in the program being compiled.
+   *
+   * Indexing the call graph by the containing MethodNode and the invocation MethodInsnNode allows
+   * finding callsites efficiently. For example, an inlining heuristic might want to know all
+   * callsites withing a callee method.
+   *
+   * Note that the call graph is not guaranteed to be complete: callsites may be missing. In
+   * particular, if a method is very large, all of its callsites might not be in the hash map.
+   * The reason is that adding a method to the call graph requires running an ASM analyzer, which
+   * can be too slow.
+   *
+   * Note that call graph entries (Callsite instances) keep a reference to the invocation
+   * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes
+   * from the classpath to the call graph (in addition to classes being compiled) may prevent
+   * method instruction nodes from being GCd. The ByteCodeRepository has a fixed size cache for
+   * parsed ClassNodes - keeping all ClassNodes alive consumed too much memory.
+   * The call graph is less problematic because only methods being called are kept alive, not entire
+   * classes. But we should keep an eye on this.
+   */
+  val callsites: mutable.Map[MethodNode, Map[MethodInsnNode, Callsite]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty)
 
   /**
-   * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions.
+   * Closure instantiations in the program being compiled.
+   *
+   * Indexing closure instantiations by the containing MethodNode is beneficial for the closure
+   * optimizer: finding callsites to re-write requires running a producers-consumers analysis on
+   * the method. Here the closure instantiations are already grouped by method.
    */
-  def analyzeCallsites(methodNode: MethodNode, definingClass: ClassBType): (List[Callsite], List[LambdaMetaFactoryCall]) = {
+  val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty)
+
+  def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = {
+    val methodCallsites = callsites(methodNode)
+    val newCallsites = methodCallsites - invocation
+    if (newCallsites.isEmpty) callsites.remove(methodNode)
+    else callsites(methodNode) = newCallsites
+    methodCallsites.get(invocation)
+  }
 
-    case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean,
-                            annotatedInline: Boolean, annotatedNoInline: Boolean,
-                            warning: Option[CalleeInfoWarning])
+  def addCallsite(callsite: Callsite): Unit = {
+    val methodCallsites = callsites(callsite.callsiteMethod)
+    callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite)
+  }
 
-    /**
-     * Analyze a callsite and gather meta-data that can be used for inlining decisions.
-     */
-    def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = {
-      val methodSignature = calleeMethodNode.name + calleeMethodNode.desc
-
-      try {
-        // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared*
-        // within a class (not for inherited methods). Since we already have the  classBType of the
-        // callee, we only check there for the methodInlineInfo, we should find it there.
-        calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match {
-          case Some(methodInlineInfo) =>
-            val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit
-
-            val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode)
-
-            // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example:
-            //   class A { @inline def f = 1 }; object B extends A; B.f  // can be inlined
-            //
-            // TODO: type analysis can render more calls statically resolved. Example:
-            //   new A.f  // can be inlined, the receiver type is known to be exactly A.
-            val isStaticallyResolved: Boolean = {
-              methodInlineInfo.effectivelyFinal ||
-                classBTypeFromParsedClassfile(receiverTypeInternalName).info.orThrow.inlineInfo.isEffectivelyFinal // (1)
-            }
+  def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = {
+    val methodClosureInits = closureInstantiations(methodNode)
+    val newClosureInits = methodClosureInits - indy
+    if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode)
+    else closureInstantiations(methodNode) = newClosureInits
+    methodClosureInits.get(indy)
+  }
 
-            val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation
+  def addClosureInstantiation(closureInit: ClosureInstantiation) = {
+    val methodClosureInits = closureInstantiations(closureInit.ownerMethod)
+    closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit)
+  }
 
-            val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map(
-              MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _))
+  def addClass(classNode: ClassNode): Unit = {
+    val classType = classBTypeFromClassNode(classNode)
+    classNode.methods.asScala.foreach(addMethod(_, classType))
+  }
 
-            // (1) For invocations of final trait methods, the callee isStaticallyResolved but also
-            //     abstract. Such a callee is not safe to inline - it needs to be re-written to the
-            //     static impl method first (safeToRewrite).
-            // (2) Final trait methods can be rewritten from the interface to the static implementation
-            //     method to enable inlining.
-            CallsiteInfo(
-              safeToInline      =
-                canInlineFromSource &&
-                isStaticallyResolved &&  // (1)
-                !isAbstract &&
-                !BytecodeUtils.isConstructor(calleeMethodNode) &&
-                !BytecodeUtils.isNativeMethod(calleeMethodNode),
-              safeToRewrite     = canInlineFromSource && isRewritableTraitCall, // (2)
-              annotatedInline   = methodInlineInfo.annotatedInline,
-              annotatedNoInline = methodInlineInfo.annotatedNoInline,
-              warning           = warning)
-
-          case None =>
-            val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning)
-            CallsiteInfo(false, false, false, false, Some(warning))
-        }
-      } catch {
-        case Invalid(noInfo: NoClassBTypeInfo) =>
-          val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo)
-          CallsiteInfo(false, false, false, false, Some(warning))
-      }
-    }
+  def addIfMissing(methodNode: MethodNode, definingClass: ClassBType): Unit = {
+    if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass)
+  }
 
+  /**
+   * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions.
+   */
+  def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = {
     // TODO: run dataflow analyses to make the call graph more precise
     //  - producers to get forwarded parameters (ForwardedParam)
     //  - typeAnalysis for more precise argument types, more precise callee
@@ -112,77 +100,236 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
     // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance
     // call is known to be not-null, in which case we don't have to emit a null check when inlining.
     // It is also used to get the stack height at the call site.
-    localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName)
 
-    val analyzer: Analyzer[_ <: Value] = {
-      if (compilerSettings.YoptNullnessTracking) new NullnessAnalyzer
-      else new Analyzer(new BasicInterpreter)
+    val analyzer = {
+      if (compilerSettings.YoptNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) {
+        Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer))
+      } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) {
+        Some(new AsmAnalyzer(methodNode, definingClass.internalName))
+      } else None
     }
-    analyzer.analyze(definingClass.internalName, methodNode)
 
-    def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match {
-      case nullnessAnalyzer: NullnessAnalyzer =>
-        val frame = nullnessAnalyzer.frameAt(call, methodNode)
-        frame.getStack(frame.getStackSize - 1 - numArgs).nullness == NotNull
+    // if the method is too large to run an analyzer, it is not added to the call graph
+    if (analyzer.nonEmpty) {
+      val Some(a) = analyzer
+      def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match {
+        case nullnessAnalyzer: NullnessAnalyzer =>
+          val frame = nullnessAnalyzer.frameAt(call, methodNode)
+          frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue
+        case _ => false
+      }
+
+      var methodCallsites = Map.empty[MethodInsnNode, Callsite]
+      var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation]
+
+      // lazy so it is only computed if actually used by computeArgInfos
+      lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName)
+
+      methodNode.instructions.iterator.asScala foreach {
+        case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code
+          val callee: Either[OptimizerWarning, Callee] = for {
+            (method, declarationClass)     <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)]
+            (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)]
+            declarationClassBType          =  classBTypeFromClassNode(declarationClassNode)
+          } yield {
+              val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source)
+              Callee(
+                callee = method,
+                calleeDeclarationClass = declarationClassBType,
+                safeToInline = safeToInline,
+                safeToRewrite = safeToRewrite,
+                annotatedInline = annotatedInline,
+                annotatedNoInline = annotatedNoInline,
+                samParamTypes = samParamTypes,
+                calleeInfoWarning = warning)
+            }
 
-      case _ => false
+          val argInfos = computeArgInfos(callee, call, prodCons)
+
+          val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || {
+            val numArgs = Type.getArgumentTypes(call.desc).length
+            receiverNotNullByAnalysis(call, numArgs)
+          }
+
+          methodCallsites += call -> Callsite(
+            callsiteInstruction = call,
+            callsiteMethod = methodNode,
+            callsiteClass = definingClass,
+            callee = callee,
+            argInfos = argInfos,
+            callsiteStackHeight = a.frameAt(call).getStackSize,
+            receiverKnownNotNull = receiverNotNull,
+            callsitePosition = callsitePositions.getOrElse(call, NoPosition)
+          )
+
+        case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null =>
+          val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType)
+          val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons)
+          methodClosureInstantiations += indy -> ClosureInstantiation(
+            lmf,
+            methodNode,
+            definingClass,
+            capturedArgInfos)
+
+        case _ =>
+      }
+
+      callsites(methodNode) = methodCallsites
+      closureInstantiations(methodNode) = methodClosureInstantiations
     }
+  }
 
-    val callsites = new collection.mutable.ListBuffer[Callsite]
-    val closureInstantiations = new collection.mutable.ListBuffer[LambdaMetaFactoryCall]
-
-    methodNode.instructions.iterator.asScala foreach {
-      case call: MethodInsnNode =>
-        val callee: Either[OptimizerWarning, Callee] = for {
-          (method, declarationClass)     <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)]
-          (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)]
-          declarationClassBType          =  classBTypeFromClassNode(declarationClassNode)
-        } yield {
-          val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source)
-          Callee(
-            callee = method,
-            calleeDeclarationClass = declarationClassBType,
-            safeToInline = safeToInline,
-            safeToRewrite = safeToRewrite,
-            annotatedInline = annotatedInline,
-            annotatedNoInline = annotatedNoInline,
-            calleeInfoWarning = warning)
-        }
+  def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    if (callee.isLeft) IntMap.empty
+    else {
+      lazy val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1)
+      argInfosForSams(callee.get.samParamTypes, callsiteInsn, numArgs, prodCons)
+    }
+  }
 
-        val argInfos = if (callee.isLeft) Nil else {
-          // TODO: for now it's Nil, because we don't run any data flow analysis
-          // there's no point in using the parameter types, that doesn't add any information.
-          // NOTE: need to run the same analyses after inlining, to re-compute the argInfos for the
-          // new duplicated callsites, see Inliner.inline
-          Nil
-        }
+  def computeCapturedArgInfos(lmf: LambdaMetaFactoryCall, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    val capturedSams = capturedSamTypes(lmf)
+    val numCaptures = Type.getArgumentTypes(lmf.indy.desc).length
+    argInfosForSams(capturedSams, lmf.indy, numCaptures, prodCons)
+  }
 
-        val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || {
-          val numArgs = Type.getArgumentTypes(call.desc).length
-          receiverNotNullByAnalysis(call, numArgs)
+  private def argInfosForSams(sams: IntMap[ClassBType], consumerInsn: AbstractInsnNode, numConsumed: => Int, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = {
+    // TODO: use type analysis instead of ProdCons - should be more efficient
+    // some random thoughts:
+    //  - assign special types to parameters and indy-lambda-functions to track them
+    //  - upcast should not change type flow analysis: don't lose information.
+    //  - can we do something about factory calls? Foo(x) for case class foo gives a Foo.
+    //    inline the factory? analysis across method boundary?
+
+    // assign to a lazy val to prevent repeated evaluation of the by-name arg
+    lazy val prodConsI = prodCons
+    lazy val firstConsumedSlot = {
+      val consumerFrame = prodConsI.frameAt(consumerInsn)
+      consumerFrame.stackTop - numConsumed + 1
+    }
+    sams flatMap {
+      case (index, _) =>
+        val prods = prodConsI.initialProducersForValueAt(consumerInsn, firstConsumedSlot + index)
+        if (prods.size != 1) None
+        else {
+          val argInfo = prods.head match {
+            case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral)
+            case ParameterProducer(local)          => Some(ForwardedParam(local))
+            case _                                 => None
+          }
+          argInfo.map((index, _))
         }
+    }
+  }
 
-        callsites += Callsite(
-          callsiteInstruction = call,
-          callsiteMethod = methodNode,
-          callsiteClass = definingClass,
-          callee = callee,
-          argInfos = argInfos,
-          callsiteStackHeight = analyzer.frameAt(call, methodNode).getStackSize,
-          receiverKnownNotNull = receiverNotNull,
-          callsitePosition = callsitePositions.getOrElse(call, NoPosition)
-        )
-
-      case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) =>
-        closureInstantiations += LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType)
-
-      case _ =>
+  def samParamTypes(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = {
+    val paramTypes = {
+      val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor))
+      val isStatic = BytecodeUtils.isStaticMethod(methodNode)
+      if (isStatic) params else receiverType +: params
     }
+    samTypes(paramTypes)
+  }
 
-    (callsites.toList, closureInstantiations.toList)
+  def capturedSamTypes(lmf: LambdaMetaFactoryCall): IntMap[ClassBType] = {
+    val capturedTypes = Type.getArgumentTypes(lmf.indy.desc).map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor))
+    samTypes(capturedTypes)
+  }
+
+  private def samTypes(types: Array[BType]): IntMap[ClassBType] = {
+    var res = IntMap.empty[ClassBType]
+    for (i <- types.indices) {
+      types(i) match {
+        case c: ClassBType =>
+          if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c)
+
+        case _ =>
+      }
+    }
+    res
   }
 
   /**
+   * Just a named tuple used as return type of `analyzeCallsite`.
+   */
+  private case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean,
+                                  annotatedInline: Boolean, annotatedNoInline: Boolean,
+                                  samParamTypes: IntMap[ClassBType],
+                                  warning: Option[CalleeInfoWarning])
+
+  /**
+   * Analyze a callsite and gather meta-data that can be used for inlining decisions.
+   */
+  private def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = {
+    val methodSignature = calleeMethodNode.name + calleeMethodNode.desc
+
+    try {
+      // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared*
+      // within a class (not for inherited methods). Since we already have the  classBType of the
+      // callee, we only check there for the methodInlineInfo, we should find it there.
+      calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match {
+        case Some(methodInlineInfo) =>
+          val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit
+
+          val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode)
+
+          val receiverType = classBTypeFromParsedClassfile(receiverTypeInternalName)
+          // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example:
+          //   class A { @inline def f = 1 }; object B extends A; B.f  // can be inlined
+          //
+          // TODO: (1) doesn't cover the following example:
+          //   trait TravLike { def map = ... }
+          //   sealed trait List extends TravLike { ... } // assume map is not overridden
+          //   final case class :: / final case object Nil
+          //   (l: List).map // can be inlined
+          // we need to know that
+          //   - the recevier is sealed
+          //   - what are the children of the receiver
+          //   - all children are final
+          //   - none of the children overrides map
+          //
+          // TODO: type analysis can render more calls statically resolved. Example:
+          //   new A.f  // can be inlined, the receiver type is known to be exactly A.
+          val isStaticallyResolved: Boolean = {
+            methodInlineInfo.effectivelyFinal ||
+              receiverType.info.orThrow.inlineInfo.isEffectivelyFinal // (1)
+          }
+
+          val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation
+
+          val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map(
+            MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _))
+
+          // (1) For invocations of final trait methods, the callee isStaticallyResolved but also
+          //     abstract. Such a callee is not safe to inline - it needs to be re-written to the
+          //     static impl method first (safeToRewrite).
+          // (2) Final trait methods can be rewritten from the interface to the static implementation
+          //     method to enable inlining.
+          CallsiteInfo(
+            safeToInline      =
+              canInlineFromSource &&
+                isStaticallyResolved &&  // (1)
+                !isAbstract &&
+                !BytecodeUtils.isConstructor(calleeMethodNode) &&
+                !BytecodeUtils.isNativeMethod(calleeMethodNode),
+            safeToRewrite     = canInlineFromSource && isRewritableTraitCall, // (2)
+            annotatedInline   = methodInlineInfo.annotatedInline,
+            annotatedNoInline = methodInlineInfo.annotatedNoInline,
+            samParamTypes     = samParamTypes(calleeMethodNode, receiverType),
+            warning           = warning)
+
+        case None =>
+          val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning)
+          CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning))
+      }
+    } catch {
+      case Invalid(noInfo: NoClassBTypeInfo) =>
+        val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo)
+        CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning))
+    }
+  }
+
+    /**
    * A callsite in the call graph.
    *
    * @param callsiteInstruction The invocation instruction
@@ -197,7 +344,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
    * @param callsitePosition    The source position of the callsite, used for inliner warnings.
    */
   final case class Callsite(callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-                            callee: Either[OptimizerWarning, Callee], argInfos: List[ArgInfo],
+                            callee: Either[OptimizerWarning, Callee], argInfos: IntMap[ArgInfo],
                             callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position) {
     override def toString =
       "Invocation of" +
@@ -210,8 +357,9 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
    * Information about invocation arguments, obtained through data flow analysis of the callsite method.
    */
   sealed trait ArgInfo
-  final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo
+  case object FunctionLiteral extends ArgInfo
   final case class ForwardedParam(index: Int) extends ArgInfo
+  //  final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo
   // can be extended, e.g., with constant types
 
   /**
@@ -227,17 +375,29 @@ class CallGraph[BT <: BTypes](val btypes: BT) {
    *                               that can be safely re-written to the static implementation method.
    * @param annotatedInline        True if the callee is annotated @inline
    * @param annotatedNoInline      True if the callee is annotated @noinline
+   * @param samParamTypes          A map from parameter positions to SAM parameter types
    * @param calleeInfoWarning      An inliner warning if some information was not available while
    *                               gathering the information about this callee.
    */
   final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType,
                           safeToInline: Boolean, safeToRewrite: Boolean,
                           annotatedInline: Boolean, annotatedNoInline: Boolean,
+                          samParamTypes: IntMap[ClassBType],
                           calleeInfoWarning: Option[CalleeInfoWarning]) {
     assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.")
+    override def toString = s"Callee($calleeDeclarationClass.${callee.name})"
   }
 
-  final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) {
+  /**
+   * Metadata about a closure instantiation, stored in the call graph
+   *
+   * @param lambdaMetaFactoryCall the InvokeDynamic instruction
+   * @param ownerMethod           the method where the closure is allocated
+   * @param ownerClass            the class containing the above method
+   * @param capturedArgInfos      information about captured arguments. Used for updating the call
+   *                              graph when re-writing a closure invocation to the body method.
+   */
+  final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType, capturedArgInfos: IntMap[ArgInfo]) {
     override def toString = s"ClosureInstantiation($lambdaMetaFactoryCall, ${ownerMethod.name + ownerMethod.desc}, $ownerClass)"
   }
   final case class LambdaMetaFactoryCall(indy: InvokeDynamicInsnNode, samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type)
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
index b0dc6ead1b..fb7dd16909 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala
@@ -9,11 +9,11 @@ package opt
 
 import scala.annotation.switch
 import scala.collection.immutable
+import scala.collection.immutable.IntMap
 import scala.reflect.internal.util.NoPosition
 import scala.tools.asm.{Type, Opcodes}
 import scala.tools.asm.tree._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
-import scala.tools.nsc.backend.jvm.analysis.ProdConsAnalyzer
 import BytecodeUtils._
 import BackendReporting._
 import Opcodes._
@@ -23,6 +23,7 @@ import scala.collection.convert.decorateAsScala._
 class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
   import btypes._
   import callGraph._
+  import analyzers._
 
   /**
    * If a closure is allocated and invoked within the same method, re-write the invocation to the
@@ -70,24 +71,19 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
       }
     }
 
-    // Grouping the closure instantiations by method allows running the ProdConsAnalyzer only once per
-    // method. Also sort the instantiations: If there are multiple closure instantiations in a method,
-    // closure invocations need to be re-written in a consistent order for bytecode stability. The local
-    // variable slots for storing captured values depends on the order of rewriting.
-    val closureInstantiationsByMethod: Map[MethodNode, immutable.TreeSet[ClosureInstantiation]] = {
-      closureInstantiations.values.groupBy(_.ownerMethod).mapValues(immutable.TreeSet.empty ++ _)
-    }
-
     // For each closure instantiation, a list of callsites of the closure that can be re-written
     // If a callsite cannot be rewritten, for example because the lambda body method is not accessible,
     // a warning is returned instead.
     val callsitesToRewrite: List[(ClosureInstantiation, List[Either[RewriteClosureApplyToClosureBodyFailed, (MethodInsnNode, Int)]])] = {
-      closureInstantiationsByMethod.iterator.flatMap({
+      closureInstantiations.iterator.flatMap({
         case (methodNode, closureInits) =>
           // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`)
-          lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.head.ownerClass.internalName)
-          closureInits.iterator.map(init => (init, closureCallsites(init, prodCons)))
-      }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod
+          // We don't need to worry about the method being too large for running an analysis: large
+          // methods are not added to the call graph / closureInstantiations map.
+          lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next().ownerClass.internalName)
+          val sortedInits = immutable.TreeSet.empty ++ closureInits.values
+          sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))).filter(_._2.nonEmpty)
+      }).toList // mapping to a list (not a map) to keep the sorting
     }
 
     // Rewrite all closure callsites (or issue inliner warnings for those that cannot be rewritten)
@@ -162,7 +158,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
           isAccessible
         }
 
-        def pos = callGraph.callsites.get(invocation).map(_.callsitePosition).getOrElse(NoPosition)
+        def pos = callGraph.callsites(ownerMethod).get(invocation).map(_.callsitePosition).getOrElse(NoPosition)
         val stackSize: Either[RewriteClosureApplyToClosureBodyFailed, Int] = bodyAccessible match {
           case Left(w)      => Left(RewriteClosureAccessCheckFailed(pos, w))
           case Right(false) => Left(RewriteClosureIllegalAccess(pos, ownerClass.internalName))
@@ -210,8 +206,9 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     insertLoadOps(invocation, ownerMethod, argumentLocalsList)
 
     // update maxStack
-    val capturesStackSize = localsForCapturedValues.size
-    val invocationStackHeight = stackHeight + capturesStackSize - 1 // -1 because the closure is gone
+    // One slot per value is correct for long / double, see comment in the `analysis` package object.
+    val numCapturedValues = localsForCapturedValues.locals.length
+    val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone
     if (invocationStackHeight > ownerMethod.maxStack)
       ownerMethod.maxStack = invocationStackHeight
 
@@ -237,26 +234,33 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     ownerMethod.instructions.remove(invocation)
 
     // update the call graph
-    val originalCallsite = callGraph.callsites.remove(invocation)
+    val originalCallsite = callGraph.removeCallsite(invocation, ownerMethod)
 
     // the method node is needed for building the call graph entry
     val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc)
     def bodyMethodIsBeingCompiled = byteCodeRepository.classNodeAndSource(lambdaBodyHandle.getOwner).map(_._2 == CompilationUnit).getOrElse(false)
-    val bodyMethodCallsite = Callsite(
-      callsiteInstruction = bodyInvocation,
-      callsiteMethod = ownerMethod,
-      callsiteClass = closureInit.ownerClass,
-      callee = bodyMethod.map({
-        case (bodyMethodNode, bodyMethodDeclClass) => Callee(
+    val callee = bodyMethod.map({
+      case (bodyMethodNode, bodyMethodDeclClass) =>
+        val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass)
+        Callee(
           callee = bodyMethodNode,
-          calleeDeclarationClass = classBTypeFromParsedClassfile(bodyMethodDeclClass),
+          calleeDeclarationClass = bodyDeclClassType,
           safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled,
           safeToRewrite = false, // the lambda body method is not a trait interface method
           annotatedInline = false,
           annotatedNoInline = false,
+          samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType),
           calleeInfoWarning = None)
-      }),
-      argInfos = Nil,
+    })
+    val argInfos = closureInit.capturedArgInfos ++ originalCallsite.map(cs => cs.argInfos map {
+      case (index, info) => (index + numCapturedValues, info)
+    }).getOrElse(IntMap.empty)
+    val bodyMethodCallsite = Callsite(
+      callsiteInstruction = bodyInvocation,
+      callsiteMethod = ownerMethod,
+      callsiteClass = closureInit.ownerClass,
+      callee = callee,
+      argInfos = argInfos,
       callsiteStackHeight = invocationStackHeight,
       receiverKnownNotNull = true, // see below (*)
       callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition)
@@ -266,7 +270,11 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) {
     //     (corresponding to the receiver) must be non-null"
     // Explanation: If the lambda body method is non-static, the receiver is a captured
     // value. It can only be captured within some instance method, so we know it's non-null.
-    callGraph.callsites(bodyInvocation) = bodyMethodCallsite
+    callGraph.addCallsite(bodyMethodCallsite)
+
+    // Rewriting a closure invocation may render code unreachable. For example, the body method of
+    // (x: T) => ??? has return type Nothing$, and an ATHROW is added (see fixLoadedNothingOrNullValue).
+    unreachableCodeEliminated -= ownerMethod
   }
 
   /**
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
index e7dd5abc57..c885a29e16 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala
@@ -47,15 +47,22 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 
     result.putByte(InlineInfoAttribute.VERSION)
 
-    var hasSelfIsFinal = 0
-    if (inlineInfo.isEffectivelyFinal)               hasSelfIsFinal |= 1
-    if (inlineInfo.traitImplClassSelfType.isDefined) hasSelfIsFinal |= 2
-    result.putByte(hasSelfIsFinal)
+    var finalSelfSam = 0
+    if (inlineInfo.isEffectivelyFinal)               finalSelfSam |= 1
+    if (inlineInfo.traitImplClassSelfType.isDefined) finalSelfSam |= 2
+    if (inlineInfo.sam.isDefined)                    finalSelfSam |= 4
+    result.putByte(finalSelfSam)
 
     for (selfInternalName <- inlineInfo.traitImplClassSelfType) {
       result.putShort(cw.newUTF8(selfInternalName))
     }
 
+    for (samNameDesc <- inlineInfo.sam) {
+      val (name, desc) = samNameDesc.span(_ != '(')
+      result.putShort(cw.newUTF8(name))
+      result.putShort(cw.newUTF8(desc))
+    }
+
     // The method count fits in a short (the methods_count in a classfile is also a short)
     result.putShort(inlineInfo.methodInfos.size)
 
@@ -94,15 +101,20 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 
     val version = nextByte()
     if (version == 1) {
-      val hasSelfIsFinal = nextByte()
-      val isFinal = (hasSelfIsFinal & 1) != 0
-      val hasSelf = (hasSelfIsFinal & 2) != 0
+      val finalSelfSam = nextByte()
+      val isFinal = (finalSelfSam & 1) != 0
+      val hasSelf = (finalSelfSam & 2) != 0
+      val hasSam  = (finalSelfSam & 4) != 0
 
-      val self = if (hasSelf) {
+      val self = if (!hasSelf) None else {
         val selfName = nextUTF8()
         Some(selfName)
-      } else {
-        None
+      }
+
+      val sam = if (!hasSam) None else {
+        val name = nextUTF8()
+        val desc = nextUTF8()
+        Some(name + desc)
       }
 
       val numEntries = nextShort()
@@ -118,7 +130,7 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
         (name + desc, MethodInlineInfo(isFinal, traitMethodWithStaticImplementation, isInline, isNoInline))
       }).toMap
 
-      InlineInfoAttribute(InlineInfo(self, isFinal, infos, None))
+      InlineInfoAttribute(InlineInfo(self, isFinal, sam, infos, None))
     } else {
       val msg = UnknownScalaInlineInfoVersion(cr.getClassName, version)
       InlineInfoAttribute(BTypes.EmptyInlineInfo.copy(warning = Some(msg)))
@@ -129,8 +141,10 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI
 object InlineInfoAttribute {
   /**
    * [u1]    version
-   * [u1]    isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1)
+   * [u1]    isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1), hasSam (<< 2)
    * [u2]?   traitImplClassSelfType (reference)
+   * [u2]?   samName (reference)
+   * [u2]?   samDescriptor (reference)
    * [u2]    numMethodEntries
    *   [u2]  name (reference)
    *   [u2]  descriptor (reference)
@@ -145,4 +159,4 @@ object InlineInfoAttribute {
  * In order to instruct the ASM framework to de-serialize the ScalaInlineInfo attribute, we need
  * to pass a prototype instance when running the class reader.
  */
-object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null))
+object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null, null))
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
index 6b2786c1a3..baa747492f 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
@@ -9,7 +9,6 @@ package opt
 
 import scala.annotation.tailrec
 import scala.tools.asm
-import asm.Handle
 import asm.Opcodes._
 import asm.tree._
 import scala.collection.convert.decorateAsScala._
@@ -17,18 +16,20 @@ import scala.collection.convert.decorateAsJava._
 import AsmUtils._
 import BytecodeUtils._
 import collection.mutable
-import scala.tools.asm.tree.analysis.SourceInterpreter
+import scala.tools.asm.tree.analysis.{Analyzer, SourceInterpreter}
 import BackendReporting._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
 
 class Inliner[BT <: BTypes](val btypes: BT) {
   import btypes._
   import callGraph._
+  import inlinerHeuristics._
+  import analyzers._
 
   def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = {
     localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach {
-      case invocation: MethodInsnNode  => callGraph.callsites.remove(invocation)
-      case indy: InvokeDynamicInsnNode => callGraph.closureInstantiations.remove(indy)
+      case invocation: MethodInsnNode  => callGraph.removeCallsite(invocation, methodNode)
+      case indy: InvokeDynamicInsnNode => callGraph.removeClosureInstantiation(indy, methodNode)
       case _ =>
     }
   }
@@ -37,7 +38,8 @@ class Inliner[BT <: BTypes](val btypes: BT) {
     rewriteFinalTraitMethodInvocations()
 
     for (request <- collectAndOrderInlineRequests) {
-      val Right(callee) = request.callee // collectAndOrderInlineRequests returns callsites with a known callee
+      val callsite = request.callsite
+      val Right(callee) = callsite.callee // collectAndOrderInlineRequests returns callsites with a known callee
 
       // Inlining a method can create unreachable code. Example:
       //   def f = throw e
@@ -48,16 +50,14 @@ class Inliner[BT <: BTypes](val btypes: BT) {
 
       // DCE above removes unreachable callsites from the call graph. If the inlining request denotes
       // such an eliminated callsite, do nothing.
-      if (callGraph.callsites contains request.callsiteInstruction) {
-        val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass,
-          callee.callee, callee.calleeDeclarationClass,
-          request.receiverKnownNotNull, keepLineNumbers = false)
+      if (callGraph.callsites(callsite.callsiteMethod).contains(callsite.callsiteInstruction)) {
+        val warnings = inline(request)
 
-        for (warning <- r) {
+        for (warning <- warnings) {
           if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) {
             val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else ""
             val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning"
-            backendReporting.inlinerWarning(request.callsitePosition, msg)
+            backendReporting.inlinerWarning(callsite.callsitePosition, msg)
           }
         }
       }
@@ -69,81 +69,36 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    *   - Always remove the same request when breaking inlining cycles
    *   - Perform inlinings in a consistent order
    */
-  object callsiteOrdering extends Ordering[Callsite] {
-    override def compare(x: Callsite, y: Callsite): Int = {
-      val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName
+  object callsiteOrdering extends Ordering[InlineRequest] {
+    override def compare(x: InlineRequest, y: InlineRequest): Int = {
+      val xCs = x.callsite
+      val yCs = y.callsite
+      val cls = xCs.callsiteClass.internalName compareTo yCs.callsiteClass.internalName
       if (cls != 0) return cls
 
-      val name = x.callsiteMethod.name compareTo y.callsiteMethod.name
+      val name = xCs.callsiteMethod.name compareTo yCs.callsiteMethod.name
       if (name != 0) return name
 
-      val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc
+      val desc = xCs.callsiteMethod.desc compareTo yCs.callsiteMethod.desc
       if (desc != 0) return desc
 
       def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction)
-      pos(x) - pos(y)
+      pos(xCs) - pos(yCs)
     }
   }
 
-  /**
-   * Select callsites from the call graph that should be inlined. The resulting list of inlining
-   * requests is allowed to have cycles, and the callsites can appear in any order.
-   */
-  def selectCallsitesForInlining: List[Callsite] = {
-    callsites.valuesIterator.filter({
-      case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) =>
-        val res = doInlineCallsite(callsite)
-
-        if (!res) {
-          if (annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) {
-            // if the callsite is annotated @inline, we report an inline warning even if the underlying
-            // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag).
-            def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined"
-            def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("")
-            if (doRewriteTraitCallsite(callsite))
-              backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg)
-            else if (!safeToInline)
-              backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg)
-            else
-              backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg)
-          } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) {
-            // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete.
-            backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get)
-          }
-        }
-
-        res
-
-      case Callsite(ins, _, _, Left(warning), _, _, _, pos) =>
-        if (warning.emitWarning(compilerSettings))
-          backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning")
-        false
-    }).toList
-  }
-
-  /**
-   * The current inlining heuristics are simple: inline calls to methods annotated @inline.
-   */
-  def doInlineCallsite(callsite: Callsite): Boolean = callsite match {
-    case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) =>
-      if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline
-      else annotatedInline && safeToInline
-
-    case _ => false
-  }
-
   def rewriteFinalTraitMethodInvocations(): Unit = {
     // Rewriting final trait method callsites to the implementation class enables inlining.
     // We cannot just iterate over the values of the `callsites` map because the rewrite changes the
     // map. Therefore we first copy the values to a list.
-    callsites.values.toList.foreach(rewriteFinalTraitMethodInvocation)
+    callsites.valuesIterator.flatMap(_.valuesIterator).toList.foreach(rewriteFinalTraitMethodInvocation)
   }
 
   /**
    * True for statically resolved trait callsites that should be rewritten to the static implementation method.
    */
   def doRewriteTraitCallsite(callsite: Callsite) = callsite.callee match {
-    case Right(Callee(callee, calleeDeclarationClass, safeToInline, true, annotatedInline, annotatedNoInline, infoWarning)) => true
+    case Right(Callee(_, _, _, safeToRewrite, _, _, _, _)) => safeToRewrite
     case _ => false
   }
 
@@ -156,7 +111,7 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    */
   def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = {
     if (doRewriteTraitCallsite(callsite)) {
-      val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, infoWarning)) = callsite.callee
+      val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, samParamTypes, infoWarning)) = callsite.callee
 
       val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc)
 
@@ -188,9 +143,10 @@ class Inliner[BT <: BTypes](val btypes: BT) {
         // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the
         // receiver value and add a cast to the self type after each.
         if (!selfTypeOk) {
-          // there's no need to run eliminateUnreachableCode here. building the call graph does that
-          // already, no code can become unreachable in the meantime.
-          val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new SourceInterpreter)
+          // We don't need to worry about the method being too large for running an analysis.
+          // Callsites of large methods are not added to the call graph.
+          localOpt.minimalRemoveUnreachableCode(callsite.callsiteMethod, callsite.callsiteClass.internalName)
+          val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new Analyzer(new SourceInterpreter))
           val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length)
           for (i <- receiverValue.insns.asScala) {
             val cast = new TypeInsnNode(CHECKCAST, selfParamType.internalName)
@@ -202,7 +158,11 @@ class Inliner[BT <: BTypes](val btypes: BT) {
         callsite.callsiteMethod.instructions.insert(callsite.callsiteInstruction, newCallsiteInstruction)
         callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction)
 
-        callGraph.callsites.remove(callsite.callsiteInstruction)
+        callGraph.removeCallsite(callsite.callsiteInstruction, callsite.callsiteMethod)
+        val staticCallSamParamTypes = {
+          if (selfParamType.info.get.inlineInfo.sam.isEmpty) samParamTypes - 0
+          else samParamTypes.updated(0, selfParamType)
+        }
         val staticCallsite = Callsite(
           callsiteInstruction = newCallsiteInstruction,
           callsiteMethod      = callsite.callsiteMethod,
@@ -214,19 +174,20 @@ class Inliner[BT <: BTypes](val btypes: BT) {
             safeToRewrite          = false,
             annotatedInline        = annotatedInline,
             annotatedNoInline      = annotatedNoInline,
+            samParamTypes          = staticCallSamParamTypes,
             calleeInfoWarning      = infoWarning)),
-          argInfos            = Nil,
+          argInfos            = callsite.argInfos,
           callsiteStackHeight = callsite.callsiteStackHeight,
           receiverKnownNotNull = callsite.receiverKnownNotNull,
           callsitePosition = callsite.callsitePosition
         )
-        callGraph.callsites(newCallsiteInstruction) = staticCallsite
+        callGraph.addCallsite(staticCallsite)
       }
 
       for (warning <- res.left) {
         val Right(callee) = callsite.callee
         val newCallee = callee.copy(calleeInfoWarning = Some(RewriteTraitCallToStaticImplMethodFailed(calleeDeclarationClass.internalName, callee.callee.name, callee.callee.desc, warning)))
-        callGraph.callsites(callsite.callsiteInstruction) = callsite.copy(callee = Right(newCallee))
+        callGraph.addCallsite(callsite.copy(callee = Right(newCallee)))
       }
     }
   }
@@ -238,15 +199,11 @@ class Inliner[BT <: BTypes](val btypes: BT) {
    * The resulting list is sorted such that the leaves of the inline request graph are on the left.
    * Once these leaves are inlined, the successive elements will be leaves, etc.
    */
-  private def collectAndOrderInlineRequests: List[Callsite] = {
-    val requests = selectCallsitesForInlining
+  private def collectAndOrderInlineRequests: List[InlineRequest] = {
+    val requestsByMethod = selectCallsitesForInlining withDefaultValue Set.empty
 
-    // This map is an index to look up the inlining requests for a method. The value sets are mutable
-    // to allow removing elided requests (to break inlining cycles). The map itself is mutable to
-    // allow efficient building: requests.groupBy would build values as List[Callsite] that need to
-    // be transformed to mutable sets.
-    val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty)
-    for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r
+    val elided = mutable.Set.empty[InlineRequest]
+    def nonElidedRequests(methodNode: MethodNode): Set[InlineRequest] = requestsByMethod(methodNode) diff elided
 
     /**
      * Break cycles in the inline request graph by removing callsites.
@@ -254,7 +211,7 @@ class Inliner[BT <: BTypes](val btypes: BT) {
      * The list `requests` is traversed left-to-right, removing those callsites that are part of a
      * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map.
      */
-    def breakInlineCycles(requests: List[Callsite]): List[Callsite] = {
+    def breakInlineCycles: List[InlineRequest] = {
       // is there a path of inline requests from start to goal?
       def isReachable(start: MethodNode, goal: MethodNode): Boolean = {
         @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match {
@@ -262,7 +219,7 @@ class Inliner[BT <: BTypes](val btypes: BT) {
             if (x == goal) true
             else if (visited(x)) reachableImpl(xs, visited)
             else {
-              val callees = inlineRequestsForMethod(x).map(_.callee.get.callee)
+              val callees = nonElidedRequests(x).map(_.callsite.callee.get.callee)
               reachableImpl(xs ::: callees.toList, visited + x)
             }
 
@@ -272,12 +229,14 @@ class Inliner[BT <: BTypes](val btypes: BT) {
         reachableImpl(List(start), Set.empty)
       }
 
-      val result = new mutable.ListBuffer[Callsite]()
+      val result = new mutable.ListBuffer[InlineRequest]()
+      val requests = requestsByMethod.valuesIterator.flatten.toArray
       // sort the inline requests to ensure that removing requests is deterministic
-      for (r <- requests.sorted(callsiteOrdering)) {
+      java.util.Arrays.sort(requests, callsiteOrdering)
+      for (r <- requests) {
         // is there a chain of inlining requests that would inline the callsite method into the callee?
-        if (isReachable(r.callee.get.callee, r.callsiteMethod))
-          inlineRequestsForMethod(r.callsiteMethod) -= r
+        if (isReachable(r.callsite.callee.get.callee, r.callsite.callsiteMethod))
+          elided += r
         else
           result += r
       }
@@ -286,11 +245,11 @@ class Inliner[BT <: BTypes](val btypes: BT) {
 
     // sort the remaining inline requests such that the leaves appear first, then those requests
     // that become leaves, etc.
-    def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = {
+    def leavesFirst(requests: List[InlineRequest], visited: Set[InlineRequest] = Set.empty): List[InlineRequest] = {
       if (requests.isEmpty) Nil
       else {
         val (leaves, others) = requests.partition(r => {
-          val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee)
+          val inlineRequestsForCallee = nonElidedRequests(r.callsite.callee.get.callee)
           inlineRequestsForCallee.forall(visited)
         })
         assert(leaves.nonEmpty, requests)
@@ -298,192 +257,232 @@ class Inliner[BT <: BTypes](val btypes: BT) {
       }
     }
 
-    leavesFirst(breakInlineCycles(requests))
+    leavesFirst(breakInlineCycles)
   }
 
+  /**
+   * Inline the callsites of an inlining request and its post-inlining requests.
+   *
+   * @return An inliner warning for each callsite that could not be inlined.
+   */
+  def inline(request: InlineRequest): List[CannotInlineWarning] = canInline(request.callsite) match {
+    case Some(warning) => List(warning)
+    case None =>
+      val instructionsMap = inlineCallsite(request.callsite)
+      val postRequests = request.post.flatMap(post => {
+        // the post-request invocation instruction might not exist anymore: it might have been
+        // inlined itself, or eliminated by DCE.
+        for {
+          inlinedInvocationInstr <- instructionsMap.get(post.callsiteInstruction).map(_.asInstanceOf[MethodInsnNode])
+          inlinedCallsite <- callGraph.callsites(request.callsite.callsiteMethod).get(inlinedInvocationInstr)
+        } yield InlineRequest(inlinedCallsite, post.post)
+      })
+      postRequests flatMap inline
+  }
 
   /**
    * Copy and adapt the instructions of a method to a callsite.
    *
    * Preconditions:
+   *   - The callsite can safely be inlined (canInline is true)
    *   - The maxLocals and maxStack values of the callsite method are correctly computed
-   *   - The callsite method contains no unreachable basic blocks, i.e., running an [[Analyzer]]
-   *     does not produce any `null` frames
+   *   - The callsite method contains no unreachable basic blocks, i.e., running an Analyzer does
+   *     not produce any `null` frames
    *
-   * @param callsiteInstruction     The invocation instruction
-   * @param callsiteStackHeight     The stack height at the callsite
-   * @param callsiteMethod          The method in which the invocation occurs
-   * @param callsiteClass           The class in which the callsite method is defined
-   * @param callee                  The invoked method
-   * @param calleeDeclarationClass  The class in which the invoked method is defined
-   * @param receiverKnownNotNull    `true` if the receiver is known to be non-null
-   * @param keepLineNumbers         `true` if LineNumberNodes should be copied to the call site
-   * @return                        `Some(message)` if inlining cannot be performed, `None` otherwise
+   * @return A map associating instruction nodes of the callee with the corresponding cloned
+   *         instruction in the callsite method.
    */
-  def inline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-             callee: MethodNode, calleeDeclarationClass: ClassBType,
-             receiverKnownNotNull: Boolean, keepLineNumbers: Boolean): Option[CannotInlineWarning] = {
-    canInline(callsiteInstruction, callsiteStackHeight, callsiteMethod, callsiteClass, callee, calleeDeclarationClass) orElse {
-      // New labels for the cloned instructions
-      val labelsMap = cloneLabels(callee)
-      val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap)
-      if (!keepLineNumbers) {
-        removeLineNumberNodes(clonedInstructions)
-      }
+  def inlineCallsite(callsite: Callsite): Map[AbstractInsnNode, AbstractInsnNode] = {
+    import callsite.{callsiteClass, callsiteMethod, callsiteInstruction, receiverKnownNotNull, callsiteStackHeight}
+    val Right(callsiteCallee) = callsite.callee
+    import callsiteCallee.{callee, calleeDeclarationClass}
+
+    // New labels for the cloned instructions
+    val labelsMap = cloneLabels(callee)
+    val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap)
+    val keepLineNumbers = callsiteClass == calleeDeclarationClass
+    if (!keepLineNumbers) {
+      removeLineNumberNodes(clonedInstructions)
+    }
 
-      // local vars in the callee are shifted by the number of locals at the callsite
-      val localVarShift = callsiteMethod.maxLocals
-      clonedInstructions.iterator.asScala foreach {
-        case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift
-        case iinc: IincInsnNode          => iinc.`var` += localVarShift
-        case _ => ()
-      }
+    // local vars in the callee are shifted by the number of locals at the callsite
+    val localVarShift = callsiteMethod.maxLocals
+    clonedInstructions.iterator.asScala foreach {
+      case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift
+      case iinc: IincInsnNode          => iinc.`var` += localVarShift
+      case _ => ()
+    }
 
-      // add a STORE instruction for each expected argument, including for THIS instance if any
-      val argStores = new InsnList
-      var nextLocalIndex = callsiteMethod.maxLocals
-      if (!isStaticMethod(callee)) {
-        if (!receiverKnownNotNull) {
-          argStores.add(new InsnNode(DUP))
-          val nonNullLabel = newLabelNode
-          argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel))
-          argStores.add(new InsnNode(ACONST_NULL))
-          argStores.add(new InsnNode(ATHROW))
-          argStores.add(nonNullLabel)
-        }
-        argStores.add(new VarInsnNode(ASTORE, nextLocalIndex))
-        nextLocalIndex += 1
+    // add a STORE instruction for each expected argument, including for THIS instance if any
+    val argStores = new InsnList
+    var nextLocalIndex = callsiteMethod.maxLocals
+    if (!isStaticMethod(callee)) {
+      if (!receiverKnownNotNull) {
+        argStores.add(new InsnNode(DUP))
+        val nonNullLabel = newLabelNode
+        argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel))
+        argStores.add(new InsnNode(ACONST_NULL))
+        argStores.add(new InsnNode(ATHROW))
+        argStores.add(nonNullLabel)
       }
+      argStores.add(new VarInsnNode(ASTORE, nextLocalIndex))
+      nextLocalIndex += 1
+    }
 
-      // We just use an asm.Type here, no need to create the MethodBType.
-      val calleAsmType = asm.Type.getMethodType(callee.desc)
+    // We just use an asm.Type here, no need to create the MethodBType.
+    val calleAsmType = asm.Type.getMethodType(callee.desc)
+    val calleeParamTypes = calleAsmType.getArgumentTypes
 
-      for(argTp <- calleAsmType.getArgumentTypes) {
-        val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp
-        argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack
-        nextLocalIndex += argTp.getSize
-      }
+    for(argTp <- calleeParamTypes) {
+      val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp
+      argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack
+      nextLocalIndex += argTp.getSize
+    }
 
-      clonedInstructions.insert(argStores)
-
-      // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label.
-      val postCallLabel = newLabelNode
-      clonedInstructions.add(postCallLabel)
-
-      // replace xRETURNs:
-      //   - store the return value (if any)
-      //   - clear the stack of the inlined method (insert DROPs)
-      //   - load the return value
-      //   - GOTO postCallLabel
-
-      val returnType = calleAsmType.getReturnType
-      val hasReturnValue = returnType.getSort != asm.Type.VOID
-      val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals
-      nextLocalIndex += returnType.getSize
-
-      def returnValueStore(returnInstruction: AbstractInsnNode) = {
-        val opc = returnInstruction.getOpcode match {
-          case IRETURN => ISTORE
-          case LRETURN => LSTORE
-          case FRETURN => FSTORE
-          case DRETURN => DSTORE
-          case ARETURN => ASTORE
-        }
-        new VarInsnNode(opc, returnValueIndex)
+    clonedInstructions.insert(argStores)
+
+    // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label.
+    val postCallLabel = newLabelNode
+    clonedInstructions.add(postCallLabel)
+
+    // replace xRETURNs:
+    //   - store the return value (if any)
+    //   - clear the stack of the inlined method (insert DROPs)
+    //   - load the return value
+    //   - GOTO postCallLabel
+
+    val returnType = calleAsmType.getReturnType
+    val hasReturnValue = returnType.getSort != asm.Type.VOID
+    val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals
+    nextLocalIndex += returnType.getSize
+
+    def returnValueStore(returnInstruction: AbstractInsnNode) = {
+      val opc = returnInstruction.getOpcode match {
+        case IRETURN => ISTORE
+        case LRETURN => LSTORE
+        case FRETURN => FSTORE
+        case DRETURN => DSTORE
+        case ARETURN => ASTORE
       }
+      new VarInsnNode(opc, returnValueIndex)
+    }
 
-      // We run an interpreter to know the stack height at each xRETURN instruction and the sizes
-      // of the values on the stack.
-      val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName)
+    // We run an interpreter to know the stack height at each xRETURN instruction and the sizes
+    // of the values on the stack.
+    // We don't need to worry about the method being too large for running an analysis. Callsites of
+    // large methods are not added to the call graph.
+    val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName)
 
-      for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) {
-        val frame = analyzer.frameAt(originalReturn)
-        var stackHeight = frame.getStackSize
+    for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) {
+      val frame = analyzer.frameAt(originalReturn)
+      var stackHeight = frame.getStackSize
 
-        val inlinedReturn = instructionMap(originalReturn)
-        val returnReplacement = new InsnList
+      val inlinedReturn = instructionMap(originalReturn)
+      val returnReplacement = new InsnList
 
-        def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize)
+      def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize)
 
-        // for non-void methods, store the stack top into the return local variable
-        if (hasReturnValue) {
-          returnReplacement add returnValueStore(originalReturn)
-          stackHeight -= 1
-        }
+      // for non-void methods, store the stack top into the return local variable
+      if (hasReturnValue) {
+        returnReplacement add returnValueStore(originalReturn)
+        stackHeight -= 1
+      }
 
-        // drop the rest of the stack
-        for (i <- 0 until stackHeight) drop(i)
+      // drop the rest of the stack
+      for (i <- 0 until stackHeight) drop(i)
 
-        returnReplacement add new JumpInsnNode(GOTO, postCallLabel)
-        clonedInstructions.insert(inlinedReturn, returnReplacement)
-        clonedInstructions.remove(inlinedReturn)
-      }
+      returnReplacement add new JumpInsnNode(GOTO, postCallLabel)
+      clonedInstructions.insert(inlinedReturn, returnReplacement)
+      clonedInstructions.remove(inlinedReturn)
+    }
 
-      // Load instruction for the return value
-      if (hasReturnValue) {
-        val retVarLoad = {
-          val opc = returnType.getOpcode(ILOAD)
-          new VarInsnNode(opc, returnValueIndex)
-        }
-        clonedInstructions.insert(postCallLabel, retVarLoad)
+    // Load instruction for the return value
+    if (hasReturnValue) {
+      val retVarLoad = {
+        val opc = returnType.getOpcode(ILOAD)
+        new VarInsnNode(opc, returnValueIndex)
       }
+      clonedInstructions.insert(postCallLabel, retVarLoad)
+    }
 
-      callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions)
-      callsiteMethod.instructions.remove(callsiteInstruction)
-
-      callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava)
-      callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava)
-
-      // Add all invocation instructions and closure instantiations that were inlined to the call graph
-      callee.instructions.iterator().asScala foreach {
-        case originalCallsiteIns: MethodInsnNode =>
-          callGraph.callsites.get(originalCallsiteIns) match {
-            case Some(originalCallsite) =>
-              val newCallsiteIns = instructionMap(originalCallsiteIns).asInstanceOf[MethodInsnNode]
-              callGraph.callsites(newCallsiteIns) = Callsite(
-                callsiteInstruction = newCallsiteIns,
-                callsiteMethod = callsiteMethod,
-                callsiteClass = callsiteClass,
-                callee = originalCallsite.callee,
-                argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them)
-                callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight,
-                receiverKnownNotNull = originalCallsite.receiverKnownNotNull,
-                callsitePosition = originalCallsite.callsitePosition
-              )
-
-            case None =>
-          }
+    callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions)
+    callsiteMethod.instructions.remove(callsiteInstruction)
+
+    callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_", localVarShift).asJava)
+    // prepend the handlers of the callee. the order of handlers matters: when an exception is thrown
+    // at some instruction, the first handler guarding that instruction and having a matching exception
+    // type is executed. prepending the callee's handlers makes sure to test those handlers first if
+    // an exception is thrown in the inlined code.
+    callsiteMethod.tryCatchBlocks.addAll(0, cloneTryCatchBlockNodes(callee, labelsMap).asJava)
+
+    callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals
+    val maxStackOfInlinedCode = {
+      // One slot per value is correct for long / double, see comment in the `analysis` package object.
+      val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1)
+      callee.maxStack + callsiteStackHeight - numStoredArgs
+    }
+    val stackHeightAtNullCheck = {
+      // When adding a null check for the receiver, a DUP is inserted, which might cause a new maxStack.
+      // If the callsite has other argument values than the receiver on the stack, these are pop'ed
+      // and stored into locals before the null check, so in that case the maxStack doesn't grow.
+      val stackSlotForNullCheck = if (!isStaticMethod(callee) && !receiverKnownNotNull && calleeParamTypes.isEmpty) 1 else 0
+      callsiteStackHeight + stackSlotForNullCheck
+    }
 
-        case indy: InvokeDynamicInsnNode =>
-          callGraph.closureInstantiations.get(indy) match {
-            case Some(closureInit) =>
-              val newIndy = instructionMap(indy).asInstanceOf[InvokeDynamicInsnNode]
-              callGraph.closureInstantiations(newIndy) = ClosureInstantiation(closureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass)
+    callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, math.max(stackHeightAtNullCheck, maxStackOfInlinedCode))
 
-            case None =>
-          }
+    callGraph.addIfMissing(callee, calleeDeclarationClass)
 
-        case _ =>
-      }
-      // Remove the elided invocation from the call graph
-      callGraph.callsites.remove(callsiteInstruction)
-
-      // Inlining a method body can render some code unreachable, see example above (in runInliner).
-      unreachableCodeEliminated -= callsiteMethod
+    def mapArgInfo(argInfo: (Int, ArgInfo)): Option[(Int, ArgInfo)] = argInfo match {
+      case lit @ (_, FunctionLiteral)             => Some(lit)
+      case (argIndex, ForwardedParam(paramIndex)) => callsite.argInfos.get(paramIndex).map((argIndex, _))
+    }
 
-      callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals
-      callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight)
+    // Add all invocation instructions and closure instantiations that were inlined to the call graph
+    callGraph.callsites(callee).valuesIterator foreach { originalCallsite =>
+      val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode]
+      val argInfos = originalCallsite.argInfos flatMap mapArgInfo
+      callGraph.addCallsite(Callsite(
+        callsiteInstruction = newCallsiteIns,
+        callsiteMethod = callsiteMethod,
+        callsiteClass = callsiteClass,
+        callee = originalCallsite.callee,
+        argInfos = argInfos,
+        callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight,
+        receiverKnownNotNull = originalCallsite.receiverKnownNotNull,
+        callsitePosition = originalCallsite.callsitePosition
+      ))
+    }
 
-      None
+    callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit =>
+      val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode]
+      val capturedArgInfos = originalClosureInit.capturedArgInfos flatMap mapArgInfo
+      callGraph.addClosureInstantiation(
+        ClosureInstantiation(
+          originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy),
+          callsiteMethod,
+          callsiteClass,
+          capturedArgInfos)
+      )
     }
+
+    // Remove the elided invocation from the call graph
+    callGraph.removeCallsite(callsiteInstruction, callsiteMethod)
+
+    // Inlining a method body can render some code unreachable, see example above (in runInliner).
+    unreachableCodeEliminated -= callsiteMethod
+
+    instructionMap
   }
 
   /**
-   * Check whether an inling can be performed. Parmeters are described in method [[inline]].
+   * Check whether an inling can be performed.
    * @return `Some(message)` if inlining cannot be performed, `None` otherwise
    */
-  def canInline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType,
-                callee: MethodNode, calleeDeclarationClass: ClassBType): Option[CannotInlineWarning] = {
+  def canInline(callsite: Callsite): Option[CannotInlineWarning] = {
+    import callsite.{callsiteInstruction, callsiteMethod, callsiteClass, callsiteStackHeight}
+    val Right(callsiteCallee) = callsite.callee
+    import callsiteCallee.{callee, calleeDeclarationClass}
 
     def calleeDesc = s"${callee.name} of type ${callee.desc} in ${calleeDeclarationClass.internalName}"
     def methodMismatch = s"Wrong method node for inlining ${textify(callsiteInstruction)}: $calleeDesc"
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala
new file mode 100644
index 0000000000..e559b63c09
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala
@@ -0,0 +1,230 @@
+/* NSC -- new Scala compiler
+ * Copyright 2005-2014 LAMP/EPFL
+ * @author  Martin Odersky
+ */
+
+package scala.tools.nsc
+package backend.jvm
+package opt
+
+import scala.collection.immutable.IntMap
+import scala.tools.asm.Type
+import scala.tools.asm.tree.{MethodNode, MethodInsnNode}
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.collection.convert.decorateAsScala._
+
+class InlinerHeuristics[BT <: BTypes](val bTypes: BT) {
+  import bTypes._
+  import inliner._
+  import callGraph._
+
+  case class InlineRequest(callsite: Callsite, post: List[PostInlineRequest])
+  case class PostInlineRequest(callsiteInstruction: MethodInsnNode, post: List[PostInlineRequest])
+
+  /**
+   * Select callsites from the call graph that should be inlined, grouped by the containing method.
+   * Cyclic inlining requests are allowed, the inliner will eliminate requests to break cycles.
+   */
+  def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = {
+    // We should only create inlining requests for callsites being compiled (not for callsites in
+    // classes on the classpath). The call graph may contain callsites of classes parsed from the
+    // classpath. In order to get only the callsites being compiled, we start at the map of
+    // compilingClasses in the byteCodeRepository.
+    val compilingMethods = for {
+      classNode  <- byteCodeRepository.compilingClasses.valuesIterator
+      methodNode <- classNode.methods.iterator.asScala
+    } yield methodNode
+
+    compilingMethods.map(methodNode => {
+      var requests = Set.empty[InlineRequest]
+      callGraph.callsites(methodNode).valuesIterator foreach {
+        case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, _, warning)), _, _, _, pos) =>
+          val request = inlineRequest(callsite)
+          requests ++= request
+          if (request.isEmpty) {
+            if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) {
+              // if the callsite is annotated @inline, we report an inline warning even if the underlying
+              // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag).
+              def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined"
+              def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("")
+              if (doRewriteTraitCallsite(callsite))
+                backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg)
+              else if (!safeToInline)
+                backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg)
+              else
+                backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg)
+            } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) {
+              // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete.
+              backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get)
+            }
+          }
+
+        case Callsite(ins, _, _, Left(warning), _, _, _, pos) =>
+          if (warning.emitWarning(compilerSettings))
+            backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning")
+      }
+      (methodNode, requests)
+    }).filterNot(_._2.isEmpty).toMap
+  }
+
+  /**
+   * Returns the inline request for a callsite if the callsite should be inlined according to the
+   * current heuristics (`-Yopt-inline-heuristics`).
+   *
+   * The resulting inline request may contain post-inlining requests of callsites that in turn are
+   * also selected as individual inlining requests.
+   */
+  def inlineRequest(callsite: Callsite): Option[InlineRequest] = compilerSettings.YoptInlineHeuristics.value match {
+    case "everything" =>
+      if (callsite.callee.get.safeToInline) Some(InlineRequest(callsite, Nil))
+      else None
+
+    case "at-inline-annotated" =>
+      val callee = callsite.callee.get
+      if (callee.safeToInline && callee.annotatedInline) Some(InlineRequest(callsite, Nil))
+      else None
+
+    case "default" =>
+      val callee = callsite.callee.get
+      if (callee.safeToInline && !callee.annotatedNoInline) {
+        val shouldInlineHO = callee.samParamTypes.nonEmpty && (callee.samParamTypes exists {
+          case (index, _) => callsite.argInfos.contains(index)
+        })
+
+        if (shouldInlineHO || callee.annotatedInline) Some(InlineRequest(callsite, Nil))
+        else None
+      } else None
+  }
+
+  /*
+  // using http://lihaoyi.github.io/Ammonite/
+
+  load.ivy("com.google.guava" % "guava" % "18.0")
+  val javaUtilFunctionClasses = {
+    val rt = System.getProperty("sun.boot.class.path").split(":").find(_.endsWith("lib/rt.jar")).get
+    val u = new java.io.File(rt).toURL
+    val l = new java.net.URLClassLoader(Array(u))
+    val cp = com.google.common.reflect.ClassPath.from(l)
+    cp.getTopLevelClasses("java.util.function").toArray.map(_.toString).toList
+  }
+
+  // found using IntelliJ's "Find Usages" on the @FunctionalInterface annotation
+  val otherClasses = List(
+    "com.sun.javafx.css.parser.Recognizer",
+    "java.awt.KeyEventDispatcher",
+    "java.awt.KeyEventPostProcessor",
+    "java.io.FileFilter",
+    "java.io.FilenameFilter",
+    "java.lang.Runnable",
+    "java.lang.Thread$UncaughtExceptionHandler",
+    "java.nio.file.DirectoryStream$Filter",
+    "java.nio.file.PathMatcher",
+    "java.time.temporal.TemporalAdjuster",
+    "java.time.temporal.TemporalQuery",
+    "java.util.Comparator",
+    "java.util.concurrent.Callable",
+    "java.util.logging.Filter",
+    "java.util.prefs.PreferenceChangeListener",
+    "javafx.animation.Interpolatable",
+    "javafx.beans.InvalidationListener",
+    "javafx.beans.value.ChangeListener",
+    "javafx.collections.ListChangeListener",
+    "javafx.collections.MapChangeListener",
+    "javafx.collections.SetChangeListener",
+    "javafx.event.EventHandler",
+    "javafx.util.Builder",
+    "javafx.util.BuilderFactory",
+    "javafx.util.Callback"
+  )
+
+  val allClasses = javaUtilFunctionClasses ::: otherClasses
+
+  load.ivy("org.ow2.asm" % "asm" % "5.0.4")
+  val classesAndSamNameDesc = allClasses.map(c => {
+    val cls = Class.forName(c)
+    val internalName = org.objectweb.asm.Type.getDescriptor(cls).drop(1).dropRight(1) // drop L and ;
+    val sams = cls.getMethods.filter(m => {
+      (m.getModifiers & java.lang.reflect.Modifier.ABSTRACT) != 0 &&
+      m.getName != "equals" // Comparator has an abstract override of "equals" for adding Javadoc
+    })
+    assert(sams.size == 1, internalName + sams.map(_.getName))
+    val sam = sams.head
+    val samDesc = org.objectweb.asm.Type.getMethodDescriptor(sam)
+    (internalName, sam.getName, samDesc)
+  })
+  println(classesAndSamNameDesc map {
+    case (cls, nme, desc) => s"""("$cls", "$nme$desc")"""
+  } mkString ("", ",\n", "\n"))
+  */
+  private val javaSams: Map[String, String] = Map(
+    ("java/util/function/BiConsumer", "accept(Ljava/lang/Object;Ljava/lang/Object;)V"),
+    ("java/util/function/BiFunction", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/BiPredicate", "test(Ljava/lang/Object;Ljava/lang/Object;)Z"),
+    ("java/util/function/BinaryOperator", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/BooleanSupplier", "getAsBoolean()Z"),
+    ("java/util/function/Consumer", "accept(Ljava/lang/Object;)V"),
+    ("java/util/function/DoubleBinaryOperator", "applyAsDouble(DD)D"),
+    ("java/util/function/DoubleConsumer", "accept(D)V"),
+    ("java/util/function/DoubleFunction", "apply(D)Ljava/lang/Object;"),
+    ("java/util/function/DoublePredicate", "test(D)Z"),
+    ("java/util/function/DoubleSupplier", "getAsDouble()D"),
+    ("java/util/function/DoubleToIntFunction", "applyAsInt(D)I"),
+    ("java/util/function/DoubleToLongFunction", "applyAsLong(D)J"),
+    ("java/util/function/DoubleUnaryOperator", "applyAsDouble(D)D"),
+    ("java/util/function/Function", "apply(Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("java/util/function/IntBinaryOperator", "applyAsInt(II)I"),
+    ("java/util/function/IntConsumer", "accept(I)V"),
+    ("java/util/function/IntFunction", "apply(I)Ljava/lang/Object;"),
+    ("java/util/function/IntPredicate", "test(I)Z"),
+    ("java/util/function/IntSupplier", "getAsInt()I"),
+    ("java/util/function/IntToDoubleFunction", "applyAsDouble(I)D"),
+    ("java/util/function/IntToLongFunction", "applyAsLong(I)J"),
+    ("java/util/function/IntUnaryOperator", "applyAsInt(I)I"),
+    ("java/util/function/LongBinaryOperator", "applyAsLong(JJ)J"),
+    ("java/util/function/LongConsumer", "accept(J)V"),
+    ("java/util/function/LongFunction", "apply(J)Ljava/lang/Object;"),
+    ("java/util/function/LongPredicate", "test(J)Z"),
+    ("java/util/function/LongSupplier", "getAsLong()J"),
+    ("java/util/function/LongToDoubleFunction", "applyAsDouble(J)D"),
+    ("java/util/function/LongToIntFunction", "applyAsInt(J)I"),
+    ("java/util/function/LongUnaryOperator", "applyAsLong(J)J"),
+    ("java/util/function/ObjDoubleConsumer", "accept(Ljava/lang/Object;D)V"),
+    ("java/util/function/ObjIntConsumer", "accept(Ljava/lang/Object;I)V"),
+    ("java/util/function/ObjLongConsumer", "accept(Ljava/lang/Object;J)V"),
+    ("java/util/function/Predicate", "test(Ljava/lang/Object;)Z"),
+    ("java/util/function/Supplier", "get()Ljava/lang/Object;"),
+    ("java/util/function/ToDoubleBiFunction", "applyAsDouble(Ljava/lang/Object;Ljava/lang/Object;)D"),
+    ("java/util/function/ToDoubleFunction", "applyAsDouble(Ljava/lang/Object;)D"),
+    ("java/util/function/ToIntBiFunction", "applyAsInt(Ljava/lang/Object;Ljava/lang/Object;)I"),
+    ("java/util/function/ToIntFunction", "applyAsInt(Ljava/lang/Object;)I"),
+    ("java/util/function/ToLongBiFunction", "applyAsLong(Ljava/lang/Object;Ljava/lang/Object;)J"),
+    ("java/util/function/ToLongFunction", "applyAsLong(Ljava/lang/Object;)J"),
+    ("java/util/function/UnaryOperator", "apply(Ljava/lang/Object;)Ljava/lang/Object;"),
+    ("com/sun/javafx/css/parser/Recognizer", "recognize(I)Z"),
+    ("java/awt/KeyEventDispatcher", "dispatchKeyEvent(Ljava/awt/event/KeyEvent;)Z"),
+    ("java/awt/KeyEventPostProcessor", "postProcessKeyEvent(Ljava/awt/event/KeyEvent;)Z"),
+    ("java/io/FileFilter", "accept(Ljava/io/File;)Z"),
+    ("java/io/FilenameFilter", "accept(Ljava/io/File;Ljava/lang/String;)Z"),
+    ("java/lang/Runnable", "run()V"),
+    ("java/lang/Thread$UncaughtExceptionHandler", "uncaughtException(Ljava/lang/Thread;Ljava/lang/Throwable;)V"),
+    ("java/nio/file/DirectoryStream$Filter", "accept(Ljava/lang/Object;)Z"),
+    ("java/nio/file/PathMatcher", "matches(Ljava/nio/file/Path;)Z"),
+    ("java/time/temporal/TemporalAdjuster", "adjustInto(Ljava/time/temporal/Temporal;)Ljava/time/temporal/Temporal;"),
+    ("java/time/temporal/TemporalQuery", "queryFrom(Ljava/time/temporal/TemporalAccessor;)Ljava/lang/Object;"),
+    ("java/util/Comparator", "compare(Ljava/lang/Object;Ljava/lang/Object;)I"),
+    ("java/util/concurrent/Callable", "call()Ljava/lang/Object;"),
+    ("java/util/logging/Filter", "isLoggable(Ljava/util/logging/LogRecord;)Z"),
+    ("java/util/prefs/PreferenceChangeListener", "preferenceChange(Ljava/util/prefs/PreferenceChangeEvent;)V"),
+    ("javafx/animation/Interpolatable", "interpolate(Ljava/lang/Object;D)Ljava/lang/Object;"),
+    ("javafx/beans/InvalidationListener", "invalidated(Ljavafx/beans/Observable;)V"),
+    ("javafx/beans/value/ChangeListener", "changed(Ljavafx/beans/value/ObservableValue;Ljava/lang/Object;Ljava/lang/Object;)V"),
+    ("javafx/collections/ListChangeListener", "onChanged(Ljavafx/collections/ListChangeListener$Change;)V"),
+    ("javafx/collections/MapChangeListener", "onChanged(Ljavafx/collections/MapChangeListener$Change;)V"),
+    ("javafx/collections/SetChangeListener", "onChanged(Ljavafx/collections/SetChangeListener$Change;)V"),
+    ("javafx/event/EventHandler", "handle(Ljavafx/event/Event;)V"),
+    ("javafx/util/Builder", "build()Ljava/lang/Object;"),
+    ("javafx/util/BuilderFactory", "getBuilder(Ljava/lang/Class;)Ljavafx/util/Builder;"),
+    ("javafx/util/Callback", "call(Ljava/lang/Object;)Ljava/lang/Object;")
+  )
+  def javaSam(internalName: InternalName): Option[String] = javaSams.get(internalName)
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
index 4132710a96..1e7b46012e 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
@@ -8,8 +8,7 @@ package backend.jvm
 package opt
 
 import scala.annotation.switch
-import scala.tools.asm.Opcodes
-import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter}
+import scala.tools.asm.{Type, ClassWriter, MethodWriter, Opcodes}
 import scala.tools.asm.tree._
 import scala.collection.convert.decorateAsScala._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
@@ -49,19 +48,53 @@ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
 class LocalOpt[BT <: BTypes](val btypes: BT) {
   import LocalOptImpls._
   import btypes._
+  import analyzers._
+
+  /**
+   * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM
+   * framework only computes these values during bytecode generation.
+   *
+   * Since there's currently no better way, we run a bytecode generator on the method and extract
+   * the computed values. This required changes to the ASM codebase:
+   *   - the [[MethodWriter]] class was made public
+   *   - accessors for maxLocals / maxStack were added to the MethodWriter class
+   *
+   * We could probably make this faster (and allocate less memory) by hacking the ASM framework
+   * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be
+   * to create a separate visitor for computing those values, duplicating the functionality from the
+   * MethodWriter.
+   *
+   * NOTE: the maxStack value computed by this method allocates two slots for long / double values,
+   * as required by the JVM spec. For running an Analyzer, one slot per long / double would be fine.
+   * See comment in `analysis` package object.
+   */
+  def computeMaxLocalsMaxStack(method: MethodNode): Unit = {
+    if (!maxLocalsMaxStackComputed(method)) {
+      method.maxLocals = 0
+      method.maxStack = 0
+      val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS)
+      val excs = method.exceptions.asScala.toArray
+      val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter]
+      method.accept(mw)
+      method.maxLocals = mw.getMaxLocals
+      method.maxStack = mw.getMaxStack
+      maxLocalsMaxStackComputed += method
+    }
+  }
 
   /**
    * Remove unreachable code from a method.
    *
    * This implementation only removes instructions that are unreachable for an ASM analyzer /
    * interpreter. This ensures that future analyses will not produce `null` frames. The inliner
-   * and call graph builder depend on this property.
+   * depends on this property.
    *
    * @return A set containing the eliminated instructions
    */
   def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Set[AbstractInsnNode] = {
     if (method.instructions.size == 0) return Set.empty     // fast path for abstract methods
     if (unreachableCodeEliminated(method)) return Set.empty // we know there is no unreachable code
+    if (!AsmAnalyzer.sizeOKForBasicValue(method)) return Set.empty // the method is too large for running an analyzer
 
     // For correctness, after removing unreachable code, we have to eliminate empty exception
     // handlers, see scaladoc of def methodOptimizations. Removing an live handler may render more
@@ -137,9 +170,10 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar
     // for local variables in dead blocks. Maybe that's a bug in the ASM framework.
 
+    def canRunDCE = AsmAnalyzer.sizeOKForBasicValue(method)
     def removalRound(): Boolean = {
       // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt)
-      val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode) {
+      val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode && canRunDCE) {
         val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName)
         val removedHandlers = removeEmptyExceptionHandlers(method)
         (removedInstructions.nonEmpty, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start)))
@@ -179,47 +213,55 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved
   }
 
-}
-
-object LocalOptImpls {
   /**
    * Removes unreachable basic blocks.
    *
-   * TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow.
-   *
    * @return A set containing eliminated instructions, and a set containing all live label nodes.
    */
   def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Set[AbstractInsnNode], Set[LabelNode]) = {
-    // The data flow analysis requires the maxLocals / maxStack fields of the method to be computed.
-    computeMaxLocalsMaxStack(method)
-    val a = new Analyzer(new BasicInterpreter)
-    a.analyze(ownerClassName, method)
-    val frames = a.getFrames
+    val a = new AsmAnalyzer(method, ownerClassName)
+    val frames = a.analyzer.getFrames
 
-    val initialSize = method.instructions.size
     var i = 0
     var liveLabels = Set.empty[LabelNode]
     var removedInstructions = Set.empty[AbstractInsnNode]
+    var maxLocals = Type.getArgumentsAndReturnSizes(method.desc) >> 2 - (if (BytecodeUtils.isStaticMethod(method)) 1 else 0)
+    var maxStack = 0
     val itr = method.instructions.iterator()
     while (itr.hasNext) {
-      itr.next() match {
-        case l: LabelNode =>
-          if (frames(i) != null) liveLabels += l
+      val insn = itr.next()
+      val isLive = frames(i) != null
+      if (isLive) maxStack = math.max(maxStack, frames(i).getStackSize)
 
-        case ins =>
+      insn match {
+        case l: LabelNode =>
           // label nodes are not removed: they might be referenced for example in a LocalVariableNode
-          if (frames(i) == null || ins.getOpcode == Opcodes.NOP) {
+          if (isLive) liveLabels += l
+
+        case v: VarInsnNode if isLive =>
+          val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0
+          maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 becauase local numbers are 0-based
+
+        case i: IincInsnNode if isLive =>
+          maxLocals = math.max(maxLocals, i.`var` + 1)
+
+        case _ =>
+          if (!isLive || insn.getOpcode == Opcodes.NOP) {
             // Instruction iterators allow removing during iteration.
             // Removing is O(1): instructions are doubly linked list elements.
             itr.remove()
-            removedInstructions += ins
+            removedInstructions += insn
           }
       }
       i += 1
     }
+    method.maxLocals = maxLocals
+    method.maxStack  = maxStack
     (removedInstructions, liveLabels)
   }
+}
 
+object LocalOptImpls {
   /**
    * Remove exception handlers that cover empty code blocks. A block is considered empty if it
    * consist only of labels, frames, line numbers, nops and gotos.
@@ -312,10 +354,7 @@ object LocalOptImpls {
     // Add the index of the local variable used by `varIns` to the `renumber` array.
     def addVar(varIns: VarInsnNode): Unit = {
       val index = varIns.`var`
-      val isWide = (varIns.getOpcode: @switch) match {
-        case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true
-        case _ => false
-      }
+      val isWide = isSize2LoadOrStore(varIns.getOpcode)
 
       // Ensure the length of `renumber`. Unused variable indices are mapped to -1.
       val minLength = if (isWide) index + 2 else index + 1
diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
index 3422167d02..74d152a4cf 100644
--- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
+++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
@@ -270,19 +270,20 @@ trait ScalaSettings extends AbsScalaSettings
   def YoptInlinerEnabled          = YoptInlineProject || YoptInlineGlobal
 
   def YoptBuildCallGraph          = YoptInlinerEnabled || YoptClosureElimination
-  def YoptAddToBytecodeRepository = YoptInlinerEnabled || YoptClosureElimination
+  def YoptAddToBytecodeRepository = YoptBuildCallGraph || YoptInlinerEnabled || YoptClosureElimination
 
   val YoptInlineHeuristics = ChoiceSetting(
     name = "-Yopt-inline-heuristics",
     helpArg = "strategy",
     descr = "Set the heuristics for inlining decisions.",
-    choices = List("at-inline-annotated", "everything"),
-    default = "at-inline-annotated")
+    choices = List("at-inline-annotated", "everything", "default"),
+    default = "default")
 
   object YoptWarningsChoices extends MultiChoiceEnumeration {
     val none                               = Choice("none"                       , "No optimizer warnings.")
     val atInlineFailedSummary              = Choice("at-inline-failed-summary"   , "One-line summary if there were @inline method calls that could not be inlined.")
     val atInlineFailed                     = Choice("at-inline-failed"           , "A detailed warning for each @inline method call that could not be inlined.")
+    val anyInlineFailed                    = Choice("any-inline-failed"          , "A detailed warning for every callsite that was chosen for inlining by the heuristics, but could not be inlined.")
     val noInlineMixed                      = Choice("no-inline-mixed"            , "In mixed compilation, warn at callsites methods defined in java sources (the inlining decision cannot be made without bytecode).")
     val noInlineMissingBytecode            = Choice("no-inline-missing-bytecode" , "Warn if an inlining decision cannot be made because a the bytecode of a class or member cannot be found on the compilation classpath.")
     val noInlineMissingScalaInlineInfoAttr = Choice("no-inline-missing-attribute", "Warn if an inlining decision cannot be made because a Scala classfile does not have a ScalaInlineInfo attribute.")
@@ -301,7 +302,8 @@ trait ScalaSettings extends AbsScalaSettings
   def YoptWarningEmitAtInlineFailed =
     !YoptWarnings.isSetByUser ||
       YoptWarnings.contains(YoptWarningsChoices.atInlineFailedSummary) ||
-      YoptWarnings.contains(YoptWarningsChoices.atInlineFailed)
+      YoptWarnings.contains(YoptWarningsChoices.atInlineFailed) ||
+      YoptWarnings.contains(YoptWarningsChoices.anyInlineFailed)
 
   def YoptWarningNoInlineMixed                      = YoptWarnings.contains(YoptWarningsChoices.noInlineMixed)
   def YoptWarningNoInlineMissingBytecode            = YoptWarnings.contains(YoptWarningsChoices.noInlineMissingBytecode)
diff --git a/src/library/scala/collection/immutable/List.scala b/src/library/scala/collection/immutable/List.scala
index 7b1997252d..eb095dbbc2 100644
--- a/src/library/scala/collection/immutable/List.scala
+++ b/src/library/scala/collection/immutable/List.scala
@@ -266,7 +266,6 @@ sealed abstract class List[+A] extends AbstractSeq[A]
     (b.toList, these)
   }
   
-  @noinline // TODO - fix optimizer bug that requires noinline (see SI-8334)
   final override def map[B, That](f: A => B)(implicit bf: CanBuildFrom[List[A], B, That]): That = {
     if (bf eq List.ReusableCBF) {
       if (this eq Nil) Nil.asInstanceOf[That] else {
@@ -285,7 +284,6 @@ sealed abstract class List[+A] extends AbstractSeq[A]
     else super.map(f)
   }
   
-  @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334)
   final override def collect[B, That](pf: PartialFunction[A, B])(implicit bf: CanBuildFrom[List[A], B, That]): That = {
     if (bf eq List.ReusableCBF) {
       if (this eq Nil) Nil.asInstanceOf[That] else {
@@ -315,7 +313,6 @@ sealed abstract class List[+A] extends AbstractSeq[A]
     else super.collect(pf)
   }
   
-  @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334)
   final override def flatMap[B, That](f: A => GenTraversableOnce[B])(implicit bf: CanBuildFrom[List[A], B, That]): That = {
     if (bf eq List.ReusableCBF) {
       if (this eq Nil) Nil.asInstanceOf[That] else {
diff --git a/src/reflect/scala/reflect/internal/Definitions.scala b/src/reflect/scala/reflect/internal/Definitions.scala
index a3d9368915..5ce5c39145 100644
--- a/src/reflect/scala/reflect/internal/Definitions.scala
+++ b/src/reflect/scala/reflect/internal/Definitions.scala
@@ -797,7 +797,9 @@ trait Definitions extends api.StandardDefinitions {
      * The class defining the method is a supertype of `tp` that
      * has a public no-arg primary constructor.
      */
-    def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else {
+    def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else findSam(tp)
+
+    def findSam(tp: Type): Symbol = {
       // if tp has a constructor, it must be public and must not take any arguments
       // (not even an implicit argument list -- to keep it simple for now)
       val tpSym  = tp.typeSymbol