Copy propagation, remove unused values (closures!) and local variables

Copy propagation uses an AliasingAnalyzer: it replaces a `LOAD n` instruction by `LOAD m` where m is the smallest alias of n. This leads to stale STORE instructions. Stale STOREs are identified using a ProdCons analyzer and replaced by POPs. Values that are pushed on the stack by a side-effect free instruction and consumed by a POP are then removed by `eliminatePushPop`. This includes elimination of unused closure allocations and unused boxes and tuple allocations (*). A final cleanup eliminates `STORE x; LOADx` pairs where the stored value is not otherwise used. Fixes - https://github.com/scala/scala-dev/issues/25 - https://github.com/scala/scala-dev/issues/7 - https://github.com/scala/scala-dev/issues/14 - https://github.com/scala/scala-dev/issues/12 (*) We don't yet rewrite reads of boxes and tuples yet. For example, `val x = (1, 2); x._1` remains a method invocation and the tuple cannot be eliminated (https://github.com/scala/scala-dev/issues/11). Inspired in many ways by Miguel's work!
author: Lukas Rytz <lukas.rytz@gmail.com> 2015-11-06 09:38:21 +0100
committer: Lukas Rytz <lukas.rytz@gmail.com> 2015-11-10 11:55:36 +0100
commit: 29d772d0655a3a5af79c3d8eca91bb33a82c7194 (patch)
tree: dce60b333d062b031b5ac57e8a1924c04b4a80c3 /src/compiler
parent: f777790250ffe92b65b51c71c7da113659177ad0 (diff)
download: scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.tar.gz
scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.tar.bz2
scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.zip
4 files changed, 920 insertions, 58 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
index b02bc7c96e..204c6a33ce 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
@@ -47,6 +47,7 @@ class BackendUtils[BT <: BTypes](val btypes: BT) {
     private val basicValueSizeLimit  = 9000l * 1000l * 1000l
     private val sourceValueSizeLimit = 8000l * 950l  * 950l
 
+    def sizeOKForAliasing(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
     def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
     def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit
     def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
index 862621c7c4..3854bc840d 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala
@@ -17,6 +17,7 @@ import scala.tools.asm.Opcodes._
 import scala.tools.asm.tree._
 import GenBCode._
 import scala.collection.convert.decorateAsScala._
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
 import scala.tools.nsc.backend.jvm.analysis.InstructionStackEffect
 
 object BytecodeUtils {
@@ -111,11 +112,16 @@ object BytecodeUtils {
 
   def isReference(t: Type) = t.getSort == Type.OBJECT || t.getSort == Type.ARRAY
 
-  def nextExecutableInstruction(instruction: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = {
-    var result = instruction
-    do { result = result.getNext }
-    while (result != null && !isExecutable(result) && !alsoKeep(result))
-    Option(result)
+  @tailrec def nextExecutableInstruction(insn: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = {
+    val next = insn.getNext
+    if (next == null || isExecutable(next) || alsoKeep(next)) Option(next)
+    else nextExecutableInstruction(next, alsoKeep)
+  }
+
+  @tailrec def nextExecutableInstructionOrLabel(insn: AbstractInsnNode): Option[AbstractInsnNode] = {
+    val next = insn.getNext
+    if (next == null || isExecutable(next) || next.isInstanceOf[LabelNode]) Option(next)
+    else nextExecutableInstructionOrLabel(next)
   }
 
   def sameTargetExecutableInstruction(a: JumpInsnNode, b: JumpInsnNode): Boolean = {
@@ -330,6 +336,149 @@ object BytecodeUtils {
     }
   }
 
+  def isSideEffectFreeCall(insn: MethodInsnNode): Boolean = {
+    isScalaBox(insn) || isScalaUnbox(insn) ||
+      isJavaBox(insn) || // not java unbox, it may NPE
+      isSideEffectFreeConstructorCall(insn)
+  }
+
+  private val srBoxesRuntimeName = "scala/runtime/BoxesRunTime"
+
+  def isScalaBox(insn: MethodInsnNode): Boolean = {
+    insn.owner == srBoxesRuntimeName && {
+      val args = Type.getArgumentTypes(insn.desc)
+      args.length == 1 && ((args(0).getSort: @switch) match {
+        case Type.BOOLEAN => insn.name == "boxToBoolean"   && insn.desc == "(Z)Ljava/lang/Boolean;"
+        case Type.BYTE    => insn.name == "boxToByte"      && insn.desc == "(B)Ljava/lang/Byte;"
+        case Type.CHAR    => insn.name == "boxToCharacter" && insn.desc == "(C)Ljava/lang/Character;"
+        case Type.SHORT   => insn.name == "boxToShort"     && insn.desc == "(S)Ljava/lang/Short;"
+        case Type.INT     => insn.name == "boxToInteger"   && insn.desc == "(I)Ljava/lang/Integer;"
+        case Type.LONG    => insn.name == "boxToLong"      && insn.desc == "(J)Ljava/lang/Long;"
+        case Type.FLOAT   => insn.name == "boxToFloat"     && insn.desc == "(F)Ljava/lang/Float;"
+        case Type.DOUBLE  => insn.name == "boxToDouble"    && insn.desc == "(D)Ljava/lang/Double;"
+        case _ => false
+      })
+    }
+  }
+
+  def isScalaUnbox(insn: MethodInsnNode): Boolean = {
+    insn.owner == srBoxesRuntimeName && ((Type.getReturnType(insn.desc).getSort: @switch) match {
+      case Type.BOOLEAN => insn.name == "unboxToBoolean" && insn.desc == "(Ljava/lang/Object;)Z"
+      case Type.BYTE    => insn.name == "unboxToByte"   && insn.desc == "(Ljava/lang/Object;)B"
+      case Type.CHAR    => insn.name == "unboxToChar"   && insn.desc == "(Ljava/lang/Object;)C"
+      case Type.SHORT   => insn.name == "unboxToShort"  && insn.desc == "(Ljava/lang/Object;)S"
+      case Type.INT     => insn.name == "unboxToInt"    && insn.desc == "(Ljava/lang/Object;)I"
+      case Type.LONG    => insn.name == "unboxToLong"   && insn.desc == "(Ljava/lang/Object;)J"
+      case Type.FLOAT   => insn.name == "unboxToFloat"  && insn.desc == "(Ljava/lang/Object;)F"
+      case Type.DOUBLE  => insn.name == "unboxToDouble" && insn.desc == "(Ljava/lang/Object;)D"
+      case _ => false
+    })
+  }
+
+  def isJavaBox(insn: MethodInsnNode): Boolean = {
+    insn.name == "valueOf" && {
+      val args = Type.getArgumentTypes(insn.desc)
+      args.length == 1 && ((args(0).getSort: @switch) match {
+        case Type.BOOLEAN => insn.owner == "java/lang/Boolean"   && insn.desc == "(Z)Ljava/lang/Boolean;"
+        case Type.BYTE    => insn.owner == "java/lang/Byte"      && insn.desc == "(B)Ljava/lang/Byte;"
+        case Type.CHAR    => insn.owner == "java/lang/Character" && insn.desc == "(C)Ljava/lang/Character;"
+        case Type.SHORT   => insn.owner == "java/lang/Short"     && insn.desc == "(S)Ljava/lang/Short;"
+        case Type.INT     => insn.owner == "java/lang/Integer"   && insn.desc == "(I)Ljava/lang/Integer;"
+        case Type.LONG    => insn.owner == "java/lang/Long"      && insn.desc == "(J)Ljava/lang/Long;"
+        case Type.FLOAT   => insn.owner == "java/lang/Float"     && insn.desc == "(F)Ljava/lang/Float;"
+        case Type.DOUBLE  => insn.owner == "java/lang/Double"    && insn.desc == "(D)Ljava/lang/Double;"
+        case _ => false
+      })
+    }
+  }
+
+  // unused objects created by these constructors are eliminated by pushPop
+  private val sideEffectFreeConstructors = Set(
+    "java/lang/Object()V",
+    "java/lang/String()V",
+    "java/lang/String(Ljava/lang/String;)V",
+    "java/lang/String([C)V",
+
+    "java/lang/Boolean(Z)V",
+    "java/lang/Byte(B)V",
+    "java/lang/Character(C)V",
+    "java/lang/Short(S)V",
+    "java/lang/Integer(I)V",
+    "java/lang/Long(J)V",
+    "java/lang/Float(F)V",
+    "java/lang/Double(D)V",
+
+    "scala/runtime/ObjectRef(Ljava/lang/Object;)V",
+    "scala/runtime/BooleanRef(Z)V",
+    "scala/runtime/ByteRef(B)V",
+    "scala/runtime/CharRef(C)V",
+    "scala/runtime/ShortRef(S)V",
+    "scala/runtime/IntRef(I)V",
+    "scala/runtime/LongRef(J)V",
+    "scala/runtime/FloatRef(F)V",
+    "scala/runtime/DoubleRef(D)V",
+
+    "scala/runtime/VolatileObjectRef(Ljava/lang/Object;)V",
+    "scala/runtime/VolatileBooleanRef(Z)V",
+    "scala/runtime/VolatileByteRef(B)V",
+    "scala/runtime/VolatileCharRef(C)V",
+    "scala/runtime/VolatileShortRef(S)V",
+    "scala/runtime/VolatileIntRef(I)V",
+    "scala/runtime/VolatileLongRef(J)V",
+    "scala/runtime/VolatileFloatRef(F)V",
+    "scala/runtime/VolatileDoubleRef(D)V"
+  ) ++ {
+    (1 to 22).map(n => "scala/Tuple" + n + "(" + ("Ljava/lang/Object;" * n) + ")V")
+  } ++ {
+    Iterator("I", "J", "D").map(t => "scala/Tuple1$mc" + t + "$sp(" + t + ")V")
+  } ++ {
+    def tuple2Specs = Iterator("I", "J", "D", "C", "Z")
+    for (a <- tuple2Specs; b <- tuple2Specs) yield "scala/Tuple2$mc" + a + b + "$sp(" + a + b + ")V"
+  }
+
+  def isSideEffectFreeConstructorCall(insn: MethodInsnNode): Boolean = {
+    insn.name == INSTANCE_CONSTRUCTOR_NAME && sideEffectFreeConstructors(insn.owner + insn.desc)
+  }
+
+  private val classesForSideEffectFreeConstructors = sideEffectFreeConstructors.map(s => s.substring(0, s.indexOf('(')))
+
+  // we only eliminate `NEW C` if the class C has a constructor that we consider side-effect free.
+  // removing a `NEW` eliminates a potential NoClassDefFoundError, so we only do it for core classes.
+  def isNewForSideEffectFreeConstructor(insn: AbstractInsnNode) = {
+    insn.getOpcode == NEW && {
+      val ti = insn.asInstanceOf[TypeInsnNode]
+      classesForSideEffectFreeConstructors(ti.desc)
+    }
+  }
+
+  def isBoxedUnit(insn: AbstractInsnNode) = {
+    insn.getOpcode == GETSTATIC && {
+      val fi = insn.asInstanceOf[FieldInsnNode]
+      fi.owner == "scala/runtime/BoxedUnit" && fi.name == "UNIT" && fi.desc == "Lscala/runtime/BoxedUnit;"
+    }
+  }
+
+  private def buildFunctionTypes(base: String): Set[InternalName] = {
+    def primitives = Iterator("B", "S", "I", "J", "C", "F", "D", "Z", "V")
+    def ijfd = Iterator("I", "J", "F", "D")
+    def ijd = Iterator("I", "J", "D")
+    Set.empty[String] ++ {
+      (0 to 22).map(base + _)
+    } ++ {
+      primitives.map(base + "0$mc" + _ + "$sp") // Function0
+    } ++ {
+      for (a <- ijfd; b <- ijfd) yield base + "1$mc" + a + b + "$sp" // Function1
+    } ++ {
+      for (a <- ijd; b <- ijd; c <- ijd) yield base + "2$mc" + a + b + c + "$sp" // Function2
+    }
+  }
+
+  private val srJFunctionTypes: Set[InternalName] = buildFunctionTypes("scala/runtime/java8/JFunction")
+  def isrJFunctionType(internalName: InternalName): Boolean = srJFunctionTypes(internalName)
+
+  private val sFunctionTypes: Set[InternalName] = buildFunctionTypes("scala/Function")
+  def isScalaFunctionType(internalName: InternalName): Boolean = sFunctionTypes(internalName)
+
   implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal {
     def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction))
   }
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
index f1392f7b7c..5ad6910d5e 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
@@ -7,44 +7,102 @@ package scala.tools.nsc
 package backend.jvm
 package opt
 
-import scala.annotation.switch
-import scala.tools.asm.{Type, ClassWriter, MethodWriter, Opcodes}
+import scala.annotation.{tailrec, switch}
+import scala.tools.asm.Type
+import scala.tools.asm.tree.analysis.BasicInterpreter
 import scala.tools.asm.Opcodes._
 import scala.tools.asm.tree._
+import scala.collection.{mutable, immutable}
 import scala.collection.convert.decorateAsScala._
 import scala.tools.nsc.backend.jvm.BTypes.InternalName
+import scala.tools.nsc.backend.jvm.analysis._
 import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
 
 /**
- * Optimizations within a single method.
+ * Optimizations within a single method. Certain optimizations enable others, for example removing
+ * unreachable code can render a `try` block empty and enable removeEmptyExceptionHandlers. The
+ * latter in turn enables more unreachable code to be eliminated (the `catch` block), so there is
+ * a cyclic dependency. Optimizations that depend on each other are therefore executed in a loop
+ * until reaching a fixpoint.
  *
- * unreachable code
- *   - removes instructions of basic blocks to which no branch instruction points
- *   + enables eliminating some exception handlers and local variable descriptors
- *     > eliminating them is required for correctness, as explained in `removeUnreachableCode`
+ * The optimizations marked UPSTREAM enable optimizations that were already executed, so they cause
+ * another iteration in the fixpoint loop.
  *
- * empty exception handlers
- *   - removes exception handlers whose try block is empty
- *   + eliminating a handler where the try block is empty and reachable will turn the catch block
- *     unreachable. in this case "unreachable code" is invoked recursively until reaching a fixpoint.
- *     > for try blocks that are unreachable, "unreachable code" removes also the instructions of the
- *       catch block, and the recursive invocation is not necessary.
+ * unreachable code / DCE (removes instructions of basic blocks to which there is no branch)
+ *   + enables downstream:
+ *     - stale stores (loads may be eliminated, removing consumers of a store)
+ *     - empty handlers (try blocks may become empty)
+ *     - simplify jumps (goto l; [dead code]; l: ..) => remove goto
+ *     - stale local variable descriptors
  *
- * simplify jumps
- *   - various simplifications, see doc comments of individual optimizations
- *   + changing or eliminating jumps may render some code unreachable, therefore "simplify jumps" is
- *     executed in a loop with "unreachable code"
+ *   note that eliminating empty handlers and stale local variable descriptors is required for
+ *   correctness, see the comment in the body of `methodOptimizations`.
  *
- * empty local variable descriptors
- *   - removes entries from the local variable table where the variable is not actually used
- *   + enables eliminating labels that the entry points to (if they are not otherwise referenced)
+ * copy propagation (replaces LOAD n to the LOAD m for the smallest m that is an alias of n)
+ *   + enables downstrem:
+ *     - stale stores (a stored value may not be loaded anymore)
+ *     - store-load pairs (a load n may now be right after a store n)
+ *   + NOTE: copy propagation is only executed once, in the first fixpoint loop iteration. none of
+ *     the other optimizations enables further copy prop. we still run it as part of the loop
+ *     because it requires unreachable code to be eliminated.
  *
- * empty line numbers
- *   - eliminates line number nodes that describe no executable instructions
- *   + enables eliminating the label of the line number node (if it's not otherwise referenced)
+ * stale stores (replace STORE by POP)
+ *   + enables downstream:
+ *     - push-pop (the new pop may be the single consumer for an instruction)
  *
- * stale labels
- *   - eliminate labels that are not referenced, merge sequences of label definitions.
+ * push-pop (when a POP is the only consumer of a value, remove the POP and its producer)
+ *   + eanbles UPSTREAM:
+ *     - stale stores (if a LOAD is removed, a corresponding STORE may become stale)
+ *   + enables downstream:
+ *     - store-load pairs (a variable may become non-live)
+ *     - stale handlers (push-pop removes code)
+ *     - simplify jumps (push-pop removes code)
+ *
+ * store-load pairs (remove `STORE x; LOAD x` if x is otherwise not used in the method)
+ *   + enables downstream:
+ *     - empty handlers (code is removes, a try block may become empty
+ *     - simplify jumps (code is removed, a goto may become redundant for example)
+ *     - stale local variable descriptors
+ *
+ * empty handlers (removes exception handlers whose try block is empty)
+ *   + enables UPSTREAM:
+ *     - unreachable code (catch block becomes unreachable)
+ *   + enables downstream:
+ *     - simplify jumps
+ *
+ * simplify jumps (various, like `GOTO l; l: ...`, see doc comments of individual optimizations)
+ *   + enables UPSTREAM
+ *     - unreachable code (`GOTO a; a: GOTO b; b: ...`, the first jump is changed to `GOTO b`, the second becomes unreachable)
+ *     - store-load pairs (a `GOTO l; l: ...` is removed between store and load)
+ *
+ *
+ * The following cleanup optimizations don't enable any upstream optimizations, so they can be
+ * executed once at the end, when the above optimizations reach a fixpoint.
+ *
+ *
+ * empty local variable descriptors (removes unused variables from the local variable table)
+ *   + enables downstream:
+ *     - stale labels (labels that the entry points to, if not otherwise referenced)
+ *
+ * empty line numbers (eliminates line number nodes that describe no executable instructions)
+ *   + enables downstream:
+ *     - stale labels (label of the line number node, if not otherwise referenced)
+ *
+ * stale labels (eliminate labels that are not referenced, merge sequences of label definitions)
+ *
+ *
+ * Note on a method's maxLocals / maxStack: the backend only uses those values for running
+ * Analyzers. The values can be conservative approximations: if an optimization removes code and
+ * the maximal stack size is now smaller, the larger maxStack value will still work fine for
+ * running an Analyzer (just that frames allocate more space than required). The correct max
+ * values written to the bytecode are re-computed during classfile serialization.
+ * To keep things simpler, we don't update the max values in every optimization:
+ *   - we do it in `removeUnreachableCodeImpl`, because it's quite straightforward
+ *   - maxLocals is updated in `compactLocalVariables`, which runs at the end of method optimizations
+ *
+ *
+ * Note on updating the call graph: whenever an optimization eliminates a callsite or a closure
+ * instantiation, we eliminate the corresponding entry from the call graph.
  */
 class LocalOpt[BT <: BTypes](val btypes: BT) {
   import LocalOptImpls._
@@ -61,6 +119,9 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
    * @return A set containing the eliminated instructions
    */
   def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Boolean = {
+    // In principle, for the inliner, a single removeUnreachableCodeImpl would be enough. But that
+    // would potentially leave behind stale handlers (empty try block) which is not legal in the
+    // classfile. So we run both removeUnreachableCodeImpl and removeEmptyExceptionHandlers.
     if (method.instructions.size == 0) return false     // fast path for abstract methods
     if (unreachableCodeEliminated(method)) return false // we know there is no unreachable code
     if (!AsmAnalyzer.sizeOKForBasicValue(method)) return false // the method is too large for running an analyzer
@@ -97,15 +158,7 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
   }
 
   /**
-   * Remove unreachable code from a method.
-   *
-   * We rely on dead code elimination provided by the ASM framework, as described in the ASM User
-   * Guide (http://asm.ow2.org/index.html), Section 8.2.1. It runs a data flow analysis, which only
-   * computes Frame information for reachable instructions. Instructions for which no Frame data is
-   * available after the analysis are unreachable.
-   *
-   * Also simplifies branching instructions, removes unused local variable descriptors, empty
-   * exception handlers, unnecessary label declarations and empty line number nodes.
+   * Run method-level optimizations, see comment on class [[LocalOpt]].
    *
    * Returns `true` if the bytecode of `method` was changed.
    */
@@ -138,28 +191,53 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar
     // for local variables in dead blocks. Maybe that's a bug in the ASM framework.
 
-    def removalRound(): Boolean = {
-      // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt)
-      val (codeRemoved, handlersRemoved, liveHandlerRemoved) = {
-        val (insnsRemoved, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName)
-        val removedHandlers = removeEmptyExceptionHandlers(method)
-        (insnsRemoved, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start)))
-      }
+    /**
+     * Runs the optimizations that depend on each other in a loop until reaching a fixpoint. See
+     * comment in class [[LocalOpt]].
+     */
+    def removalRound(runDCE: Boolean, runCopyProp: Boolean, maxRecursion: Int = 10): Boolean = {
+      if (maxRecursion == 0) return false
+
+      val (codeRemoved, liveLabels) = if (runDCE) removeUnreachableCodeImpl(method, ownerClassName) else (false, Set.empty[LabelNode])
+
+      // runCopyProp is only true in the first iteration; there's no optimization below that enables
+      // further copy propagation. it is still part of the fixpoint loop because it needs to run after DCE.
+      val copyPropChanged = if (runCopyProp) {
+        if (!runDCE) minimalRemoveUnreachableCode(method, ownerClassName) // copyProp (AliasingAnalyzer) requires DCE
+        copyProp(method, ownerClassName)
+      } else false
+
+      val storesRemoved = if (compilerSettings.YoptCopyPropagation) eliminateStaleStores(method, ownerClassName) else false
+
+      val pushPopRemoved = if (compilerSettings.YoptCopyPropagation) eliminatePushPop(method, ownerClassName) else false
+
+      val storeLoadRemoved = if (compilerSettings.YoptCopyPropagation) eliminateStoreLoad(method) else false
+
+      val removedHandlers = if (runDCE) removeEmptyExceptionHandlers(method) else Set.empty[TryCatchBlockNode]
+      val handlersRemoved = removedHandlers.nonEmpty
+      val liveHandlerRemoved = removedHandlers.exists(h => liveLabels(h.start))
 
       val jumpsChanged = if (compilerSettings.YoptSimplifyJumps) simplifyJumps(method) else false
 
-      // Eliminating live handlers and simplifying jump instructions may render more code
-      // unreachable, so we need to run another round.
-      if (liveHandlerRemoved || jumpsChanged) removalRound()
+      // See doc comment in the beginning of this file (optimizations marked UPSTREAM)
+      if (liveHandlerRemoved || jumpsChanged) {
+        // changing live handlers or jumps enables DCE to remove more code, so runDCE = true
+        removalRound(runDCE = true, runCopyProp = false, maxRecursion = maxRecursion - 1)
+      } else if (pushPopRemoved) {
+        // pushPop doesn't enable DCE, but other optimizations (stale stores). so we iterate, but without DCE.
+        removalRound(runDCE = false, runCopyProp = false, maxRecursion = maxRecursion - 1)
+      }
 
-      codeRemoved || handlersRemoved || jumpsChanged
+      codeRemoved || copyPropChanged || storesRemoved || pushPopRemoved || storeLoadRemoved || handlersRemoved || jumpsChanged
     }
 
-    val codeHandlersOrJumpsChanged = if (compilerSettings.YoptUnreachableCode && AsmAnalyzer.sizeOKForBasicValue(method)) {
+    val dceCopyPropHandlersOrJumpsChanged = if (AsmAnalyzer.sizeOKForBasicValue(method)) {
       // we run DCE even if the method is already in the `unreachableCodeEliminated` map: the DCE
       // here is more thorough than `minimalRemoveUnreachableCode`
-      val r = removalRound()
-      unreachableCodeEliminated += method
+      val r = removalRound(
+        runDCE = compilerSettings.YoptUnreachableCode,
+        runCopyProp = compilerSettings.YoptCopyPropagation)
+      if (compilerSettings.YoptUnreachableCode) unreachableCodeEliminated += method
       r
     } else false
 
@@ -179,9 +257,7 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     assert(nullOrEmpty(method.visibleLocalVariableAnnotations), method.visibleLocalVariableAnnotations)
     assert(nullOrEmpty(method.invisibleLocalVariableAnnotations), method.invisibleLocalVariableAnnotations)
 
-    unreachableCodeEliminated += method
-
-    codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved
+    dceCopyPropHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved
   }
 
   /**
@@ -235,7 +311,641 @@ class LocalOpt[BT <: BTypes](val btypes: BT) {
     method.maxStack  = maxStack
     (changed, liveLabels)
   }
+
+  /**
+   * For every `xLOAD n`, find all local variable slots that are aliases of `n` using an
+   * AliasingAnalyzer and change the instruction to `xLOAD m` where `m` is the smallest alias.
+   * This leaves behind potentially stale `xSTORE n` instructions, which are then eliminated
+   * by [[eliminateStaleStores]].
+   */
+  def copyProp(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForAliasing(method) && {
+      var changed = false
+      val numParams = parametersSize(method)
+      lazy val aliasAnalysis = new AsmAnalyzer(method, owner, new AliasingAnalyzer(new BasicInterpreter))
+
+      // Remember locals that are used in a `LOAD` instruction. Assume a program has two LOADs:
+      //
+      //   ...
+      //   LOAD 3  // aliases of 3 here: <3>
+      //   ...
+      //   LOAD 1  // aliases of 1 here: <1, 3>
+      //
+      // In this example, we should change the second load from 1 to 3, which might render the
+      // local variable 1 unused.
+      val knownUsed = new Array[Boolean](method.maxLocals)
+
+      def usedOrMinAlias(it: IntIterator, init: Int): Int = {
+        if (knownUsed(init)) init
+        else {
+          var r = init
+          while (it.hasNext) {
+            val n = it.next()
+            // knownUsed.lenght is the number of locals, `n` may be a stack slot
+            if (n < knownUsed.length && knownUsed(n)) return n
+            if (n < r) r = n
+          }
+          r
+        }
+      }
+
+      val it = method.instructions.iterator
+      while (it.hasNext) it.next() match {
+        case vi: VarInsnNode if vi.`var` >= numParams && isLoad(vi) =>
+          val aliases = aliasAnalysis.frameAt(vi).asInstanceOf[AliasingFrame[_]].aliasesOf(vi.`var`)
+          if (aliases.size > 1) {
+            val alias = usedOrMinAlias(aliases.iterator, vi.`var`)
+            if (alias != -1) {
+              changed = true
+              vi.`var` = alias
+            }
+          }
+          knownUsed(vi.`var`) = true
+
+        case _ =>
+      }
+
+      changed
+    }
+  }
+
+  /**
+   * Eliminate `xSTORE` instructions that have no consumer. If the instruction can be completely
+   * eliminated, it is replaced by a POP. The [[eliminatePushPop]] cleans up unnecessary POPs.
+   *
+   * Note that an `ASOTRE` can not always be eliminated: it removes a reference to the object that
+   * is currently stored in that local, which potentially frees it for GC (SI-5313). Therefore
+   * we replace such stores by `POP; ACONST_NULL; ASTORE x`.
+   */
+  def eliminateStaleStores(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForSourceValue(method) && {
+      lazy val prodCons = new ProdConsAnalyzer(method, owner)
+      def hasNoCons(varIns: AbstractInsnNode, slot: Int) = prodCons.consumersOfValueAt(varIns.getNext, slot).isEmpty
+
+      var changed = false
+
+      // insns to delete: IINC that have no consumer
+      val toDelete = mutable.ArrayBuffer.empty[IincInsnNode]
+      // xSTORE insns to be replaced by POP or POP2
+      val storesToDrop = mutable.ArrayBuffer.empty[VarInsnNode]
+      // ASTORE insn that have no consumer.
+      //   - if the local is not live, the store is replaced by POP
+      //   - otherwise, pop the argument value and store NULL instead. Unless the boolean field is
+      //     `true`: then the store argument is already known to be ACONST_NULL.
+      val toNullOut = mutable.ArrayBuffer.empty[(VarInsnNode, Boolean)]
+      // `true` for variables that are known to be live
+      val liveVars = new Array[Boolean](method.maxLocals)
+
+      val it = method.instructions.iterator
+      while (it.hasNext) it.next() match {
+        case vi: VarInsnNode if isStore(vi) && hasNoCons(vi, vi.`var`) =>
+          val canElim = vi.getOpcode != ASTORE || {
+            val currentFieldValueProds = prodCons.initialProducersForValueAt(vi, vi.`var`)
+            currentFieldValueProds.size == 1 && (currentFieldValueProds.head match {
+              case ParameterProducer(0) => !isStaticMethod(method) // current field value is `this`, which won't be gc'd anyway
+              case _: UninitializedLocalProducer => true // field is not yet initialized, so current value cannot leak
+              case _ => false
+            })
+          }
+          if (canElim) storesToDrop += vi
+          else {
+            val prods = prodCons.producersForValueAt(vi, prodCons.frameAt(vi).stackTop)
+            val isStoreNull = prods.size == 1 && prods.head.getOpcode == ACONST_NULL
+            toNullOut += ((vi, isStoreNull))
+          }
+          changed = true
+
+        case ii: IincInsnNode if hasNoCons(ii, ii.`var`) =>
+          toDelete += ii
+          changed = true
+
+        case vi: VarInsnNode =>
+          val longSize = if (isSize2LoadOrStore(vi.getOpcode)) 1 else 0
+          liveVars(vi.`var`) = true
+
+        case ii: IincInsnNode =>
+          liveVars(ii.`var`) = true
+        case _ =>
+      }
+
+      def replaceByPop(vi: VarInsnNode): Unit = {
+        val size = if (isSize2LoadOrStore(vi.getOpcode)) 2 else 1
+        method.instructions.set(vi, getPop(size))
+      }
+
+      toDelete foreach method.instructions.remove
+      storesToDrop foreach replaceByPop
+      for ((vi, isStoreNull) <- toNullOut) {
+        if (!liveVars(vi.`var`)) replaceByPop(vi) // can drop `ASTORE x` where x has only dead stores
+        else {
+          if (!isStoreNull) {
+            val prev = vi.getPrevious
+            method.instructions.insert(prev, new InsnNode(ACONST_NULL))
+            method.instructions.insert(prev, getPop(1))
+          }
+        }
+      }
+
+      changed
+    }
+  }
+
+  /**
+   * When a POP instruction has a single producer, remove the POP and eliminate the producer by
+   * bubbling up the POPs. For example, given
+   *   ILOAD 1; ILOAD 2; IADD; POP
+   * we first eliminate the POP, then the IADD, then its inputs, so the entire sequence goes away.
+   * If a producer cannot be eliminated (need to keep side-effects), a POP is inserted.
+   *
+   * A special case eliminates the creation of unused objects with side-effect-free constructors:
+   *   NEW scala/Tuple1; DUP; ALOAD 0; INVOKESPECIAL scala/Tuple1.<init>; POP
+   * The POP has a signle producer (the DUP), it's easy to eliminate these two. A special case
+   * is needed to eliminate the INVOKESPECIAL and NEW.
+   */
+  def eliminatePushPop(method: MethodNode, owner: InternalName): Boolean = {
+    AsmAnalyzer.sizeOKForSourceValue(method) && {
+      // A queue of instructions producing a value that has to be eliminated. If possible, the
+      // instruction (and its inputs) will be removed, otherwise a POP is inserted after
+      val queue = mutable.Queue.empty[ProducedValue]
+      // Contains constructor invocations for values that can be eliminated if unused.
+      val sideEffectFreeConstructorCalls = mutable.ArrayBuffer.empty[MethodInsnNode]
+
+      // instructions to remove (we don't change the bytecode while analyzing it. this allows
+      // running the ProdConsAnalyzer only once.)
+      val toRemove = mutable.Set.empty[AbstractInsnNode]
+      // instructions to insert before some instruction
+      val toInsertBefore = mutable.Map.empty[AbstractInsnNode, List[InsnNode]]
+      // an instruction to insert after some instruction
+      val toInsertAfter = mutable.Map.empty[AbstractInsnNode, AbstractInsnNode]
+
+      lazy val prodCons = new ProdConsAnalyzer(method, owner)
+
+      /**
+       * True if the values produced by `prod` are all the same. Most instructions produce a single
+       * value. DUP and DUP2 (with a size-2 input) produce two equivalent values. However, there
+       * are some exotic instructions that produce multiple non-equal values (DUP_X1, SWAP, ...).
+       *
+       * Assume we have `DUP_X2; POP`. In order to remove the `POP` we need to change the DUP_X2
+       * into something else, which is not straightforward.
+       *
+       * Since scalac never emits any of those exotic bytecodes, we don't optimize them.
+       */
+      def producerHasSingleOutput(prod: AbstractInsnNode): Boolean = (prod.getOpcode: @switch) match {
+        case DUP_X1 | DUP_X2 | DUP2_X1 | DUP2_X2 | SWAP => false
+        case DUP2 => prodCons.frameAt(prod).peekStack(0).getSize == 2
+        case _ => true
+      }
+
+      // Set.empty if not a single consumer, or if the producer is the exception value of a catch block
+      /**
+       * Returns the producers for the stack value `inputSlot` consumed by `cons`, if the consumer
+       * instruction is the only consumer for all of these producers.
+       *
+       * I a producer has multiple consumers, or the value is the caught exception in a catch
+       * block, this method returns Set.empty.
+       */
+      def producersIfSingleConsumer(cons: AbstractInsnNode, inputSlot: Int): Set[AbstractInsnNode] = {
+        val prods = prodCons.producersForValueAt(cons, inputSlot)
+        val singleConsumer = prods forall {
+          case _: ExceptionProducer[_] =>
+            false // POP of an exception in a catch block cannot be removed
+          case prod =>
+            producerHasSingleOutput(prod) && {
+              // for DUP / DUP2, we only consider the value that is actually consumed by cons
+              val conss = prodCons.consumersOfValueAt(prod.getNext, inputSlot)
+              conss.size == 1 && conss.head == cons
+            }
+        }
+        if (singleConsumer) prods else Set.empty
+      }
+
+      /**
+       * For a POP instruction that is the single consumer of its producers, remove the POP and
+       * enqueue the producers.
+       */
+      def handleInitialPop(pop: AbstractInsnNode): Unit = {
+        val prods = producersIfSingleConsumer(pop, prodCons.frameAt(pop).stackTop)
+        if (prods.nonEmpty) {
+          toRemove += pop
+          val size = if (pop.getOpcode == POP2) 2 else 1
+          queue ++= prods.map(ProducedValue(_, size))
+        }
+      }
+
+      /**
+       * Traverse the method in its initial state and collect all POP instructions and side-effect
+       * free constructor invocations that can be eliminated.
+       */
+      def collectInitialPopsAndPureConstrs(): Unit = {
+        val it = method.instructions.iterator
+        while (it.hasNext) {
+          val insn = it.next()
+          (insn.getOpcode: @switch) match {
+            case POP | POP2 =>
+              handleInitialPop(insn)
+
+            case INVOKESPECIAL =>
+              val mi = insn.asInstanceOf[MethodInsnNode]
+              if (isSideEffectFreeConstructorCall(mi)) sideEffectFreeConstructorCalls += mi
+
+            case _ =>
+          }
+        }
+      }
+
+      /**
+       * Eliminate the `numArgs` inputs of the instruction `prod` (which was eliminated). Fo
+       * each input value
+       *   - if the `prod` instruction is the single consumer, enqueue the producers of the input
+       *   - otherwise, insert a POP instruction to POP the input value
+       */
+      def handleInputs(prod: AbstractInsnNode, numArgs: Int): Unit = {
+        val frame = prodCons.frameAt(prod)
+        val pops = mutable.ListBuffer.empty[InsnNode]
+        @tailrec def handle(stackOffset: Int): Unit = {
+          if (stackOffset >= 0) {
+            val prods = producersIfSingleConsumer(prod, frame.stackTop - stackOffset)
+            val nSize = frame.peekStack(stackOffset).getSize
+            if (prods.isEmpty) pops append getPop(nSize)
+            else queue ++= prods.map(ProducedValue(_, nSize))
+            handle(stackOffset - 1)
+          }
+        }
+        handle(numArgs - 1) // handle stack offsets (numArgs - 1) to 0
+        if (pops.nonEmpty) toInsertBefore(prod) = pops.toList
+      }
+
+      /**
+       * Eliminate the closure value produced by `indy`. If the SAM type is known to construct
+       * without side-effects (e.g. scala/runtime/java8/JFunctionN), the `indy` and its inputs
+       * are eliminated, otherwise a POP is inserted.
+       */
+      def handleClosureInst(indy: InvokeDynamicInsnNode): Unit = {
+        if (isrJFunctionType(Type.getReturnType(indy.desc).getInternalName)) {
+          toRemove += indy
+          callGraph.removeClosureInstantiation(indy, method)
+          handleInputs(indy, Type.getArgumentTypes(indy.desc).length)
+        } else {
+          toInsertAfter(indy) = getPop(1)
+        }
+      }
+
+      def runQueue(): Unit = while (queue.nonEmpty) {
+        val ProducedValue(prod, size) = queue.dequeue()
+
+        def prodString = s"Producer ${AsmUtils textify prod}@${method.instructions.indexOf(prod)}\n${AsmUtils textify method}"
+        def popAfterProd(): Unit = {
+          toInsertAfter(prod) = getPop(size) }
+
+        (prod.getOpcode: @switch) match {
+          case ACONST_NULL | ICONST_M1 | ICONST_0 | ICONST_1 | ICONST_2 | ICONST_3 | ICONST_4 | ICONST_5 | LCONST_0 | LCONST_1 | FCONST_0 | FCONST_1 | FCONST_2 | DCONST_0 | DCONST_1 |
+               BIPUSH | SIPUSH | ILOAD | LLOAD | FLOAD | DLOAD | ALOAD | DUP =>
+            toRemove += prod
+
+          case DUP2 =>
+            assert(size == 2, s"DUP2 for two size-1 values; $prodString") // ensured in method `producerHasSingleOutput`
+            toRemove += prod
+
+          case DUP_X1 | DUP_X2 | DUP2_X1 | DUP2_X2 | SWAP =>
+            // these are excluded in method `producerHasSingleOutput`
+            assert(false, s"Cannot eliminate value pushed by an instruction with multiple output values; $prodString")
+
+          case IDIV | LDIV | IREM | LREM =>
+            popAfterProd() // keep potential division by zero
+
+          case IADD | LADD | FADD | DADD | ISUB | LSUB | FSUB | DSUB | IMUL | LMUL | FMUL | DMUL | FDIV | DDIV | FREM | DREM |
+               LSHL | LSHR | LUSHR |
+               IAND | IOR | IXOR | LAND | LOR | LXOR |
+               LCMP | FCMPL | FCMPG | DCMPL | DCMPG=>
+            toRemove += prod
+            handleInputs(prod, 2)
+
+          case INEG | LNEG | FNEG | DNEG |
+               I2L | I2F | I2D | L2I | L2F | L2D | F2I | F2L | F2D | D2I | D2L | D2F | I2B | I2C | I2S =>
+            toRemove += prod
+            handleInputs(prod, 1)
+
+          case GETFIELD | GETSTATIC =>
+            // TODO eliminate side-effect free module loads (https://github.com/scala/scala-dev/issues/16)
+            if (isBoxedUnit(prod)) toRemove += prod
+            else popAfterProd() // keep potential class initialization (static field) or NPE (instance field)
+
+          case INVOKEVIRTUAL | INVOKESPECIAL | INVOKESTATIC | INVOKEINTERFACE =>
+            val methodInsn = prod.asInstanceOf[MethodInsnNode]
+            if (isSideEffectFreeCall(methodInsn)) {
+              toRemove += prod
+              callGraph.removeCallsite(methodInsn, method)
+              val receiver = if (methodInsn.getOpcode == INVOKESTATIC) 0 else 1
+              handleInputs(prod, Type.getArgumentTypes(methodInsn.desc).length + receiver)
+            } else
+              popAfterProd()
+
+          case INVOKEDYNAMIC =>
+            prod match {
+              case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => handleClosureInst(indy)
+              case _ => popAfterProd()
+            }
+
+          case CHECKCAST =>
+            // Special case for `IndyLambda JFunctionN; CHECKCAST FuncionN`. This sequence is emitted
+            // for all closure instantiations, see https://issues.scala-lang.org/browse/SI-9540
+            val castType = prod.asInstanceOf[TypeInsnNode].desc
+            if (isScalaFunctionType(castType)) {
+
+              // the `FunctionN` part of an internal name (e.g. `scala/runtime/java8/JFunction1$mcDI$sp`)
+              def funPart(funType: String): String = {
+                var end = funType.indexOf('$')
+                if (end == -1) end = funType.length
+                funType.substring(funType.lastIndexOf('/') + 1, end)
+              }
+
+              // true if the cast will always succeed
+              def castOk(indy: InvokeDynamicInsnNode): Boolean = {
+                val generatedFunctionType = Type.getReturnType(indy.desc).getInternalName
+                isrJFunctionType(generatedFunctionType) && funPart(generatedFunctionType) == "J" + funPart(castType)
+              }
+
+              val castProd = producersIfSingleConsumer(prod, prodCons.frameAt(prod).stackTop)
+              if (castProd.size == 1) castProd.head match {
+                case callGraph.LambdaMetaFactoryCall(indy, _, _, _) if castOk(indy) =>
+                  toRemove += prod        // remove the cast
+                  handleClosureInst(indy) // remove the indyLambda
+                case _ => popAfterProd()
+              } else
+                popAfterProd()
+            } else
+              popAfterProd()
+
+          case NEW =>
+            if (isNewForSideEffectFreeConstructor(prod)) toRemove += prod
+            else popAfterProd()
+
+          case LDC =>
+            // don't remove class literals: keep the potential NoClassDefFoundError
+            if (prod.asInstanceOf[LdcInsnNode].cst.isInstanceOf[Type]) popAfterProd()
+            else toRemove += prod
+
+          case NEWARRAY | ANEWARRAY =>
+            toRemove += prod
+            handleInputs(prod, 1)
+
+          case MULTIANEWARRAY =>
+            toRemove += prod
+            handleInputs(prod, prod.asInstanceOf[MultiANewArrayInsnNode].dims)
+
+          case _ =>
+            popAfterProd()
+        }
+      }
+
+      // there are two cases when we can eliminate a constructor call:
+      //   - NEW T; INVOKESPECIAL T.<init> -- there's no DUP, the new object is consumed only by the constructor)
+      //   - NEW T; DUP; INVOKESPECIAL T.<init>, where the DUP will be removed
+      def eliminateUnusedPureConstructorCalls(): Boolean = {
+        var changed = false
+
+        def removeConstructorCall(mi: MethodInsnNode): Unit = {
+          toRemove += mi
+          callGraph.removeCallsite(mi, method)
+          sideEffectFreeConstructorCalls -= mi
+          changed = true
+        }
+
+        for (mi <- sideEffectFreeConstructorCalls.toList) { // toList to allow removing elements while traversing
+          val frame = prodCons.frameAt(mi)
+          val stackTop = frame.stackTop
+          val numArgs = Type.getArgumentTypes(mi.desc).length
+          val receiverProds = producersIfSingleConsumer(mi, stackTop - numArgs)
+          if (receiverProds.size == 1) {
+            val receiverProd = receiverProds.head
+            if (receiverProd.getOpcode == NEW) {
+              removeConstructorCall(mi)
+              handleInputs(mi, numArgs + 1) // removes the producers of args and receiver
+            } else if (receiverProd.getOpcode == DUP && toRemove.contains(receiverProd)) {
+              val dupProds = producersIfSingleConsumer(receiverProd, prodCons.frameAt(receiverProd).stackTop)
+              if (dupProds.size == 1 && dupProds.head.getOpcode == NEW) {
+                removeConstructorCall(mi)
+                handleInputs(mi, numArgs) // removes the producers of args. the producer of the receiver is DUP and already in toRemove.
+                queue += ProducedValue(dupProds.head, 1) // removes the NEW (which is NOT the producer of the receiver!)
+              }
+            }
+          }
+        }
+        changed
+      }
+
+      collectInitialPopsAndPureConstrs()
+
+      // eliminating producers enables eliminating unused constructor calls (when a DUP gets removed).
+      // vice-versa, eliminating a constructor call adds producers of constructor parameters to the queue.
+      // so the two run in a loop.
+      runQueue()
+      while(eliminateUnusedPureConstructorCalls())
+        runQueue()
+
+      var changed = false
+      toInsertAfter foreach {
+        case (target, insn) =>
+          nextExecutableInstructionOrLabel(target) match {
+            // `insn` is of type `InsnNode`, so we only need to check the Opcode when comparing to another instruction
+            case Some(next) if next.getOpcode == insn.getOpcode && toRemove(next) =>
+              // Inserting and removing a POP at the same place should not enable `changed`. This happens
+              // when a POP directly follows a producer that cannot be eliminated, e.g. INVOKESTATIC A.m ()I; POP
+              // The POP is initially added to `toRemove`, and the `INVOKESTATIC` producer is added to the queue.
+              // Because the producer cannot be elided, a POP is added to `toInsertAfter`.
+              toRemove -= next
+
+            case _ =>
+              changed = true
+              method.instructions.insert(target, insn)
+          }
+      }
+      toInsertBefore foreach {
+        case (target, insns) =>
+          changed = true
+          insns.foreach(method.instructions.insertBefore(target, _))
+      }
+      toRemove foreach { insn =>
+        changed = true
+        method.instructions.remove(insn)
+      }
+      changed
+    }
+  }
+
+  case class ProducedValue(producer: AbstractInsnNode, size: Int) {
+    override def toString = s"<${AsmUtils textify producer}>"
+  }
+
+  /**
+   * Remove `xSTORE n; xLOAD n` paris if
+   *   - the local variable n is not used anywhere else in the method (1), and
+   *   - there are no executable instructions and no live labels (jump targets) between the two (2)
+   *
+   * Note: store-load pairs that cannot be eliminated could be replaced by `DUP; xSTORE n`, but
+   * that's just cosmetic and doesn't help for anything.
+   *
+   * (1) This could be made more precise by running a prodCons analysis and checking that the load
+   * is the only user of the store. Then we could eliminate the pair even if the variable is live
+   * (except for ASTORE, SI-5313). Not needing an analyzer is more efficient, and catches most
+   * cases.
+   *
+   * (2) The implementation uses a conservative estimation for liveness (if some instruction uses
+   * local n, then n is considered live in the entire method). In return, it doesn't need to run an
+   * Analyzer on the method, making it more efficient.
+   *
+   * This method also removes `ACONST_NULL; ASTORE n` if the local n is not live. This pattern is
+   * introduced by [[eliminateStaleStores]].
+   *
+   * The implementation is a little tricky to support the following case:
+   *   ISTORE 1; ISTORE 2; ILOAD 2; ACONST_NULL; ASTORE 3; ILOAD 1
+   * The outer store-load pair can be removed if two the inner pairs can be.
+   */
+  def eliminateStoreLoad(method: MethodNode): Boolean = {
+    val removePairs = mutable.Set.empty[RemovePair]
+    val liveVars = new Array[Boolean](method.maxLocals)
+    val liveLabels = mutable.Set.empty[LabelNode]
+
+    def mkRemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]): RemovePair = {
+      val r = RemovePair(store, other, depends)
+      removePairs += r
+      r
+    }
+
+    def registerLiveVarsLabels(insn: AbstractInsnNode): Unit = insn match {
+      case vi: VarInsnNode => liveVars(vi.`var`) = true
+      case ii: IincInsnNode => liveVars(ii.`var`) = true
+      case j: JumpInsnNode => liveLabels += j.label
+      case s: TableSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala
+      case s: LookupSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala
+      case _ =>
+    }
+
+    val pairStartStack = new mutable.Stack[(AbstractInsnNode, mutable.ListBuffer[RemovePairDependency])]
+
+    def push(insn: AbstractInsnNode) = {
+      pairStartStack push ((insn, mutable.ListBuffer.empty))
+    }
+
+    def addDepends(dependency: RemovePairDependency) = if (pairStartStack.nonEmpty) {
+      val (_, depends) = pairStartStack.top
+      depends += dependency
+    }
+
+    def completesStackTop(load: AbstractInsnNode) = isLoad(load) && pairStartStack.nonEmpty && {
+      pairStartStack.top match {
+        case (store: VarInsnNode, _) => store.`var` == load.asInstanceOf[VarInsnNode].`var`
+        case _ => false
+      }
+    }
+
+    /**
+     * Try to pair `insn` with its correspondant on the stack
+     *   - if the stack top is a store and `insn` is a corresponding load, create a pair
+     *   - otherwise, check the two top stack values for `null; store`. if it matches, create
+     *     a pair and continue pairing `insn` on the remaining stack
+     *   - otherwise, empty the stack and mark the local variables in it live
+     */
+    def tryToPairInstruction(insn: AbstractInsnNode): Unit = {
+      @tailrec def emptyStack(): Unit = if (pairStartStack.nonEmpty) {
+        registerLiveVarsLabels(pairStartStack.pop()._1)
+        emptyStack()
+      }
+
+      @tailrec def tryPairing(): Unit = {
+        if (completesStackTop(insn)) {
+          val (store: VarInsnNode, depends) = pairStartStack.pop()
+          addDepends(mkRemovePair(store, insn, depends.toList))
+        } else if (pairStartStack.nonEmpty) {
+          val (top, topDepends) = pairStartStack.pop()
+          if (pairStartStack.nonEmpty) {
+            (pairStartStack.top, top) match {
+              case ((ldNull: InsnNode, depends), store: VarInsnNode) if ldNull.getOpcode == ACONST_NULL && store.getOpcode == ASTORE =>
+                pairStartStack.pop()
+                addDepends(mkRemovePair(store, ldNull, depends.toList))
+                // example: store; (null; store;) (store; load;) load
+                //                         s1^     ^^^^^p1^^^^^        // p1 is added to s1's depends
+                // then:    store; (null; store;) load
+                //           s2^    ^^^^p2^^^^^                        // p1 and p2 are added to s2's depends
+                topDepends foreach addDepends
+                tryPairing()
+
+              case _ =>
+                // empty the stack - a non-matching insn was found, cannot create any pairs to remove
+                registerLiveVarsLabels(insn)
+                registerLiveVarsLabels(top)
+                emptyStack()
+            }
+          } else {
+            // stack only has one element
+            registerLiveVarsLabels(insn)
+            registerLiveVarsLabels(top)
+          }
+        } else {
+          // stack is empty already
+          registerLiveVarsLabels(insn)
+        }
+      }
+
+      tryPairing()
+    }
+
+
+    var insn = method.instructions.getFirst
+
+    @tailrec def advanceToNextExecutableOrLabel(): Unit = {
+      insn = insn.getNext
+      if (insn != null && !isExecutable(insn) && !insn.isInstanceOf[LabelNode]) advanceToNextExecutableOrLabel()
+    }
+
+    while (insn != null) {
+      insn match {
+        case _ if insn.getOpcode == ACONST_NULL          => push(insn)
+        case vi: VarInsnNode if isStore(vi)              => push(insn)
+        case label: LabelNode if pairStartStack.nonEmpty => addDepends(LabelNotLive(label))
+        case _                                           => tryToPairInstruction(insn)
+      }
+      advanceToNextExecutableOrLabel()
+    }
+
+    // elide RemovePairs that depend on live labels or other RemovePair that have to be elided.
+    // example:  store 1; store 2; label x; load 2; load 1
+    // if x is live, the inner pair has to be elided, causing the outer pair to be elided too.
+
+    var doneEliding = false
+
+    def elide(removePair: RemovePair) = {
+      doneEliding = false
+      liveVars(removePair.store.`var`) = true
+      removePairs -= removePair
+    }
+
+    while (!doneEliding) {
+      doneEliding = true
+      for (removePair <- removePairs.toList) {
+        val slot = removePair.store.`var`
+        if (liveVars(slot)) elide(removePair)
+        else removePair.depends foreach {
+          case LabelNotLive(label) => if (liveLabels(label)) elide(removePair)
+          case other: RemovePair => if (!removePairs(other)) elide(removePair)
+        }
+      }
+    }
+
+    for (removePair <- removePairs) {
+      method.instructions.remove(removePair.store)
+      method.instructions.remove(removePair.other)
+    }
+
+    removePairs.nonEmpty
+  }
+}
+
+trait RemovePairDependency
+case class RemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]) extends RemovePairDependency {
+  override def toString = s"<${AsmUtils textify store},${AsmUtils textify other}> [$depends]"
 }
+case class LabelNotLive(label: LabelNode) extends RemovePairDependency
 
 object LocalOptImpls {
   /**
@@ -550,7 +1260,7 @@ object LocalOptImpls {
   private def simplifyBranchOverGoto(method: MethodNode, instruction: AbstractInsnNode): Option[JumpInsnNode] = instruction match {
     case ConditionalJump(jump) =>
       // don't skip over labels, see doc comment
-      nextExecutableInstruction(jump, alsoKeep = _.isInstanceOf[LabelNode]) match {
+      nextExecutableInstructionOrLabel(jump) match {
         case Some(Goto(goto)) =>
           if (nextExecutableInstruction(goto, alsoKeep = Set(jump.label)) == Some(jump.label)) {
             val newJump = new JumpInsnNode(negateJumpOpcode(jump.getOpcode), goto.label)
diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
index a9ec8c7b30..cbd9ee1a55 100644
--- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
+++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala
@@ -230,6 +230,7 @@ trait ScalaSettings extends AbsScalaSettings
     val emptyLineNumbers        = Choice("empty-line-numbers",        "Eliminate unnecessary line number information.")
     val emptyLabels             = Choice("empty-labels",              "Eliminate and collapse redundant labels in the bytecode.")
     val compactLocals           = Choice("compact-locals",            "Eliminate empty slots in the sequence of local variables.")
+    val copyPropagation         = Choice("copy-propagation",          "Eliminate redundant local variables (locals that are copies of others).")
     val nullnessTracking        = Choice("nullness-tracking",         "Track nullness / non-nullness of local variables and apply optimizations.")
     val closureElimination      = Choice("closure-elimination" ,      "Rewrite closure invocations to the implementation method and eliminate closures.")
     val inlineProject           = Choice("inline-project",            "Inline only methods defined in the files being compiled.")
@@ -240,7 +241,7 @@ trait ScalaSettings extends AbsScalaSettings
     private val defaultChoices = List(unreachableCode)
     val lDefault        = Choice("l:default",   "Enable default optimizations: "+ defaultChoices.mkString(","),                                    expandsTo = defaultChoices)
 
-    private val methodChoices = List(unreachableCode, simplifyJumps, emptyLineNumbers, emptyLabels, compactLocals, nullnessTracking, closureElimination)
+    private val methodChoices = List(unreachableCode, simplifyJumps, emptyLineNumbers, emptyLabels, compactLocals, copyPropagation, nullnessTracking, closureElimination)
     val lMethod         = Choice("l:method",    "Enable intra-method optimizations: "+ methodChoices.mkString(","),                                expandsTo = methodChoices)
 
     private val projectChoices = List(lMethod, inlineProject)
@@ -262,6 +263,7 @@ trait ScalaSettings extends AbsScalaSettings
   def YoptEmptyLineNumbers        = Yopt.contains(YoptChoices.emptyLineNumbers)
   def YoptEmptyLabels             = Yopt.contains(YoptChoices.emptyLabels)
   def YoptCompactLocals           = Yopt.contains(YoptChoices.compactLocals)
+  def YoptCopyPropagation         = Yopt.contains(YoptChoices.copyPropagation)
   def YoptNullnessTracking        = Yopt.contains(YoptChoices.nullnessTracking)
   def YoptClosureElimination      = Yopt.contains(YoptChoices.closureElimination)
author	Lukas Rytz <lukas.rytz@gmail.com>	2015-11-06 09:38:21 +0100
committer	Lukas Rytz <lukas.rytz@gmail.com>	2015-11-10 11:55:36 +0100
commit	29d772d0655a3a5af79c3d8eca91bb33a82c7194 (patch)
tree	dce60b333d062b031b5ac57e8a1924c04b4a80c3 /src/compiler
parent	f777790250ffe92b65b51c71c7da113659177ad0 (diff)
download	scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.tar.gz scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.tar.bz2 scala-29d772d0655a3a5af79c3d8eca91bb33a82c7194.zip