diff options
Diffstat (limited to 'src/compiler/scala/tools/nsc/backend/jvm/opt')
11 files changed, 3964 insertions, 1372 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala new file mode 100644 index 0000000000..78fc7e1ecf --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BoxUnbox.scala @@ -0,0 +1,907 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package opt + +import scala.annotation.tailrec +import scala.tools.asm.Type +import scala.tools.asm.Opcodes._ +import scala.tools.asm.tree._ +import scala.collection.mutable +import scala.collection.JavaConverters._ +import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + +class BoxUnbox[BT <: BTypes](val btypes: BT) { + import btypes._ + import backendUtils._ + + /** + * Eliminate box-unbox pairs within `method`. Such appear commonly after closure elimination: + * + * def t2 = { + * val f = (b: Byte, i: Int) => i + b // no specialized variant for this function type + * f(1, 2) // invokes the generic `apply` + * } + * + * The closure optimizer re-writes the `apply` call to `anonfun$adapted` method, which takes + * boxed arguments. After inlining this method, we get + * + * def t2 = { + * val a = boxByte(1) + * val b = boxInteger(2) + * val r = boxInteger(anonfun$(unboxByte(a), unboxInt(b))) + * unboxInt(r) + * } + * + * All these box/unbox operations are eliminated here. + * + * Implementation: for every box operation, find all consumers of the boxed value, then all + * producers of these consumers, repeat until reaching a fixpoint. If this results in a set of + * boxing and unboxing operations, the box can be eliminated. + * + * There are two methods for eliminating boxes: + * M1: If there is a single boxing operation, the boxed value(s) are stored into new local + * variable(s) at the allocation site. Accesses to the boxed value are re-written to reads / + * writes of these locals. Advantages: + * - supports mutable boxes (IntRef and friends) + * - supports eliminating unbox operations even if the box object needs to be created + * because it escapes (see E4) + * - works by keeping the unboxed value(s) in locals AND the box in its original form + * - only for immutable boxes: modifications to the escaped box cannot be applied to + * the local variable(s) holding the boxed value(s). + * Restriction: + * - does not work if there are multiple boxing operations (see E1) + * + * M2: If there are multiple boxing operations, the boxing operations are simply eliminated, + * leaving the unboxed value(s) on the stack. Store / load operations that previously + * acted on the box are adapted to handle the boxed type(s). If the box contains multiple + * values (or a size-2 value, which doesn't fit into locals that were used for the box), + * new local slots are used for store / load operations. Restrictions: + * - does not support re-writing writes to (mutable) boxes (see E2) + * - does not support re-writing reads of boxes that also escape (see E3) + * + * + * E1: M1 only works if there's a single boxing operation. + * def e1(b: Boolean) = { + * val i: Integer = box(10) // 10 is stored into a new local, box operation and i removed + * val j: Integer = box(20) // 20 is stored into a new local, box operation and j removed + * val r = if (b) i else j // loads and stores of the box are eliminated, r no longer exists + * unbox(r) // cannot rewrite: we don't know which local to load + * } + * Note: the example has no write and the box does not escape, so M2 works here. + * + * E2: mutable boxes with multiple boxing operations cannot be eliminated. + * M1: see E1 + * M2: cannot replace an `IntRef` on the stack by an `Int` value on the stack, an Int on the + * stack cannot be modified. + * + * def e2(b: Boolean) = { + * val r1 = new IntRef(0) + * val r2 = new IntRef(1) + * val modRef = if (b) r1 else r2 + * modRef.elem += 10 // M1: cannot rewrite: which local to write? same as E1. + * (if (b) r1 else r2).elem += 10 // M2: cannot change an Int on the stack + * (r1.elem, r2.elem) + * } + * + * + * E3: escaping boxes with multiple boxing operations cannot be rewritten. + * M1: see E1. + * M2: at *, instead of an Integer, an Int is on the stack, but the escape method expects an + * Integer. We cannot just create a box at this point: if there are multiple escapes (or + * an escape is executed more than once), the difference could be observed (reference + * equality). + * + * def e3(b: Boolean) = { + * val i: Integer = box(1) + * val j: Integer = box(2) + * escape(if (b) i else j) // * + * unbox(if (b) i else j) + * } + * + * + * E4: M1 supports rewriting unbox operations of immutable boxes that escape + * def e4 = { + * val i: Integer = box(10) // 10 is stored into a new local, loaded as argument for the box call + * escape(i) // not changed, still loads the local i holding the box + * unbox(i) // rewritten to a pop (of the box) and a load of the local variable + * } + * + * + * E4 seems to be a bit of a corner case, but it's necessary to unblock box eliminations with + * mutual dependencies. Example: + * + * val ((a, b), c) = ((1, 2), 3) + * a + b + c + * + * generates (after a few cleanups) the following (pseudo-bytecode, ignoring primitive boxing, specialization): + * + * load 1, load 2, new Tuple2 // stack: Tuple2 + * load 3 // stack: Tuple2; Int + * val local1 = new Tuple2 + * val local2 = local1._1.asInstanceOf[Tuple2] + * val c = local1._2.asInstanceOf[Int] + * if (local2 == null) throw new MatchError(local1) + * val a = local2._1 + * val b = local2._2 + * a + b + c + * + * In order to eliminate the tuples, we first need to eliminate the outer tuple (stored in local1) + * - single box operation, so we use M1 + * - there are three consumers of the outer tuple: `local1._1`, `local1._2` and + * `new MatchError(local1)`. in the last one, the tuple escapes. + * - note that the MatchError creation is dead code: local2 is never null. However, our nullness + * analysis cannot identify this: it does not track nullness through tuple stores and loads. + * - if we re-write the non-escaping consumers of the outer tuple, but keep the tuple allocation + * and the escaping consumer, we get the following: + * + * load 1, load 2 + * val newLocal1 = new Tuple2; load newLocal1 // stack: Tuple2 + * val newLocal2 = 3; load newLocal2 // stack: Tuple2; Int + * val local1 = new Tuple2 + * val local2 = newLocal1 + * val c = newLocal2 + * if (local2 == null) throw new MatchError(local1) + * val a = local2._1 + * val b = local2._2 + * a + b + c + * + * At this point, the nullness analysis sees that `local2 == null` is false, dead code elimination + * removes the `throw new MatchError(local1)`. After eliminating the allocation of the outer tuple, + * the inner tuple (stored in newLocal1) can also be eliminated. + * + * + * Special case for tuples wrt specialization: a tuple getter may box or unbox the value stored + * in the tuple: calling `_1` on a `Tuple2$mcII$sp` boxes the primitive Int stored in the tuple. + * Similarly, calling `_1$mcI$sp` on a non-specialized `Tuple2` unboxes the Integer in the tuple. + * When eliminating such getters, we have to introduce appropriate box / unbox calls. + * + * + * TODO: add new calls (box / unbox) to the call graph (not urgent) + * TODO: update the call graph because stack heights change (not urgent). + * this may also affect other optimizations, we ignored the issue so far. check how stack + * heights stored in the call graph are used. + * Note: these tasks are not urgent because the call graph is not currently used during / after + * method-local optimizations, only before to perform inlining and closure rewriting. + */ + def boxUnboxElimination(method: MethodNode, owner: InternalName): Boolean = { + AsmAnalyzer.sizeOKForSourceValue(method) && { + val toInsertBefore = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]] + val toReplace = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]] + val toDelete = mutable.Set.empty[AbstractInsnNode] + + val knownHandled = mutable.Set.empty[AbstractInsnNode] + + lazy val prodCons = new ProdConsAnalyzer(method, owner) + + var nextLocal = method.maxLocals + def getLocal(size: Int) = { + val r = nextLocal + nextLocal += size + r + } + + var maxStackGrowth = 0 + + /** Method M1 for eliminating box-unbox pairs (see doc comment in the beginning of this file) */ + def replaceBoxOperationsSingleCreation(creation: BoxCreation, finalCons: Set[BoxConsumer], boxKind: BoxKind, keepBox: Boolean): Unit = { + /** + * If the box is eliminated, all copy operations (loads, stores, others) of the box need to + * be removed. This method returns all copy operations that should be removed. + * + * Returns `None` in case some exotic copy operation is found that cannot be removed + * (DUP2_X1 and friends - these are never emitted by scalac). In this case, the box cannot + * be eliminated. + */ + def copyOpsToEliminate: Option[Set[AbstractInsnNode]] = { + var elidableCopyOps = Set.empty[AbstractInsnNode] + var replaceOK = true + val copyOps = new CopyOpsIterator(Set(creation), finalCons, prodCons) + while (replaceOK && copyOps.hasNext) copyOps.next() match { + case vi: VarInsnNode => + elidableCopyOps += vi + + case copyOp if copyOp.getOpcode == DUP => + elidableCopyOps += copyOp + + case _ => + replaceOK = false + } + if (replaceOK) Some(elidableCopyOps) else None + } + + val canRewrite = keepBox || (copyOpsToEliminate match { + case Some(copyOps) => + toDelete ++= copyOps + true + + case _ => false + }) + + if (canRewrite) { + val localSlots: Vector[(Int, Type)] = boxKind.boxedTypes.map(tp => (getLocal(tp.getSize), tp))(collection.breakOut) + + // store boxed value(s) into localSlots + val storeOps = localSlots.toList reverseMap { case (slot, tp) => + new VarInsnNode(tp.getOpcode(ISTORE), slot) + } + val storeInitialValues = creation.loadInitialValues match { + case Some(ops) => ops ::: storeOps + case None => storeOps + } + if (keepBox) { + val loadOps: List[VarInsnNode] = localSlots.map({ case (slot, tp) => + new VarInsnNode(tp.getOpcode(ILOAD), slot) + })(collection.breakOut) + toInsertBefore(creation.valuesConsumer) = storeInitialValues ::: loadOps + } else { + toReplace(creation.valuesConsumer) = storeInitialValues + toDelete ++= creation.allInsns - creation.valuesConsumer + } + + // rewrite consumers + finalCons foreach { + case write: StaticSetterOrInstanceWrite => + assert(!keepBox, s"cannot eliminate box write if the box remains (and escapes): $write") + val (slot, tp) = localSlots(boxKind.extractedValueIndex(write)) + val storeOp = new VarInsnNode(tp.getOpcode(ISTORE), slot) + toReplace(write.consumer) = List(storeOp) + + case c: EscapingConsumer => + assert(keepBox, s"found escaping consumer, but box is eliminated: $c") + + case extraction => + val (slot, tp) = localSlots(boxKind.extractedValueIndex(extraction)) + val loadOps = new VarInsnNode(tp.getOpcode(ILOAD), slot) :: extraction.postExtractionAdaptationOps(tp) + if (keepBox) toReplace(extraction.consumer) = getPop(1) :: loadOps + else toReplace(extraction.consumer) = loadOps + toDelete ++= extraction.allInsns - extraction.consumer + } + } + } + + /** Method M2 for eliminating box-unbox pairs (see doc comment in the beginning of this file) */ + def replaceBoxOperationsMultipleCreations(allCreations: Set[BoxCreation], allConsumers: Set[BoxConsumer], boxKind: BoxKind): Unit = { + /** + * If a single-value size-1 box is eliminated, local variables slots holding the box are + * reused to hold the unboxed value. In case there's an entry for that local variable in the + * method's local variables table (debug info), adapt the type. + * + * If there are multiple entries for a local variable that's changing types, then all + * entries for that variable are deleted - it's not obvious how to find the correct entry. + * Note that scalac never re-uses local variable slots for non-overlapping locals. Also note + * that all locals that are newly created during the optimizer don't have an entry either. + * + * Finally, note that variables that become unused are removed later from the table by + * removeUnusedLocalVariableNodes in LocalOpt. + * + * Unlike modifications that affect the method's instructions (which uses toReplace etc), + * we can directly modify the local variable table - it does not affect the frames of the + * ProdCons analysis. + */ + def updateLocalVariableTypes(reTypedLocals: Map[Int, Type]): Unit = { + lazy val localsByIndex = method.localVariables.asScala.groupBy(_.index) + for ((index, tp) <- reTypedLocals) localsByIndex.get(index).map(_.toList) match { + case Some(List(local)) => + local.desc = tp.getDescriptor + case Some(locals) => + locals foreach method.localVariables.remove + case _ => + } + } + + /** Remove box creations - leave the boxed value(s) on the stack instead. */ + def replaceCreationOps(): Unit = { + for (creation <- allCreations) creation.loadInitialValues match { + case None => + toDelete ++= creation.allInsns + + case Some(ops) => + toReplace(creation.valuesConsumer) = ops + toDelete ++= (creation.allInsns - creation.valuesConsumer) + } + } + + /** + * Replace a value extraction operation. For a single-value box, the extraction operation can + * just be removed. An extraction from a multi-value box is replaced by POP operations for the + * non-used values, and an xSTORE / xLOAD for the extracted value. Example: tuple3._2 becomes + * POP; xSTORE n; POP; xLOAD n. + */ + def replaceExtractionOps(): Unit = { + if (boxKind.boxedTypes.lengthCompare(1) == 0) { + // fast path for single-value boxes + allConsumers.foreach(extraction => extraction.postExtractionAdaptationOps(boxKind.boxedTypes.head) match { + case Nil => + toDelete ++= extraction.allInsns + case ops => + toReplace(extraction.consumer) = ops + toDelete ++= extraction.allInsns - extraction.consumer + }) + } else { + for (extraction <- allConsumers) { + val valueIndex = boxKind.extractedValueIndex(extraction) + val replacementOps = if (valueIndex == 0) { + val pops = boxKind.boxedTypes.tail.map(t => getPop(t.getSize)) + pops ::: extraction.postExtractionAdaptationOps(boxKind.boxedTypes.head) + } else { + var loadOps: List[AbstractInsnNode] = null + val consumeStack = boxKind.boxedTypes.zipWithIndex reverseMap { + case (tp, i) => + if (i == valueIndex) { + val resultSlot = getLocal(tp.getSize) + loadOps = new VarInsnNode(tp.getOpcode(ILOAD), resultSlot) :: extraction.postExtractionAdaptationOps(tp) + new VarInsnNode(tp.getOpcode(ISTORE), resultSlot) + } else { + getPop(tp.getSize) + } + } + consumeStack ::: loadOps + } + toReplace(extraction.consumer) = replacementOps + toDelete ++= extraction.allInsns - extraction.consumer + } + } + } + + checkCopyOpReplacements(allCreations, allConsumers, boxKind.boxedTypes, nextLocal, prodCons) match { + case Some((replacements, nextCopyOpLocal, reTypedLocals)) => + toReplace ++= replacements + updateLocalVariableTypes(reTypedLocals) + nextLocal = nextCopyOpLocal + replaceCreationOps() + replaceExtractionOps() + // Conservative (safe) value for stack growth. In every frame that initially has a multi-value + // box on the stack, the stack now contains all of the individual values. So for every eliminated + // box, the maxStack may be up to N-1 slots larger. + maxStackGrowth += boxKind.boxedTypes.length - 1 + + case None => + } + } + + val it = method.instructions.iterator + while (it.hasNext) { + val insn = it.next() + if (!knownHandled(insn)) BoxKind.valueCreationKind(insn, prodCons) match { + case Some((boxCreation, boxKind)) => + allCreationsConsumers(boxCreation, boxKind, prodCons) match { + case Some((allCreations, allConsumers)) => + val (escapingConsumers, boxConsumers) = allConsumers.partition(_.isEscaping) + if (boxConsumers.nonEmpty) { + for (c <- allCreations) knownHandled ++= c.allInsns + for (e <- allConsumers) knownHandled ++= e.allInsns + + val hasEscaping = escapingConsumers.nonEmpty + val hasWrite = allConsumers.exists(_.isWrite) + if (!hasEscaping && !hasWrite) { + // M2 -- see doc comment in the beginning of this file + // If both M1 and M2 can be applied, we prefer M2 because it doesn't introduce new locals. + replaceBoxOperationsMultipleCreations(allCreations, allConsumers, boxKind) + } else if (allCreations.size == 1 && (!hasEscaping || !boxKind.isMutable)) { + // M1 -- see doc comment in the beginning of this file + replaceBoxOperationsSingleCreation(allCreations.head, allConsumers, boxKind, keepBox = hasEscaping) + } + } + + case None => + } + + case None => + } + } + + def removeFromCallGraph(insn: AbstractInsnNode): Unit = insn match { + case mi: MethodInsnNode => callGraph.removeCallsite(mi, method) + case _ => + } + + for ((location, ops) <- toInsertBefore; op <- ops) + method.instructions.insertBefore(location, op) + + for ((oldOp, newOps) <- toReplace) { + for (newOp <- newOps) method.instructions.insertBefore(oldOp, newOp) + method.instructions.remove(oldOp) + removeFromCallGraph(oldOp) + } + + for (op <- toDelete) { + method.instructions.remove(op) + removeFromCallGraph(op) + } + + method.maxLocals = nextLocal + method.maxStack += maxStackGrowth + toInsertBefore.nonEmpty || toReplace.nonEmpty || toDelete.nonEmpty + } + } + + /** + * Given a box creations operation + * - find all ultimate consumers for the produced value. then: + * - for all consumed values, find all producer operations. check that all are box creations + * - recurse until reaching a fixpoint + * + * Returns a set of box creations and a set of box consumers. Note that the box consumers may + * contain [[EscapingConsumer]]s, even if there are multiple box creation operations. The callee + * will handle this case (and not attempt to eliminate the box). + */ + def allCreationsConsumers(initialCreation: BoxCreation, boxKind: BoxKind, prodCons: ProdConsAnalyzer): Option[(Set[BoxCreation], Set[BoxConsumer])] = { + var creations = Set(initialCreation) + var consumers = Set.empty[BoxConsumer] + + def addCreations(boxConsumer: BoxConsumer): Boolean = { + val newProds = boxConsumer.boxProducers(prodCons).filterNot(prod => creations.exists(_.producer == prod)) + newProds.forall(prod => boxKind.checkBoxCreation(prod, prodCons) match { + case Some(boxCreation) => + creations += boxCreation + addBoxConsumers(boxCreation) + + case _ => false + }) + } + + def addBoxConsumers(creation: BoxCreation): Boolean = { + val newCons = creation.boxConsumers(prodCons, ultimate = true).filterNot(cons => consumers.exists(_.consumer == cons)) + newCons.forall(cons => boxKind.checkBoxConsumer(cons, prodCons) match { + case Some(boxConsumer) => + consumers += boxConsumer + addCreations(boxConsumer) + + case _ => + creations.size <= 1 && { + // If there's a single box creation, the box operations can still be rewritten + consumers += EscapingConsumer(cons) + true + } + }) + } + + if (addBoxConsumers(initialCreation)) Some((creations, consumers)) + else None + } + + /** + * Takes two sets `initialProds` and `finalCons` such that all boxes produced by the first set + * are only consumed by an operation in the second set. + * + * Returns a map that replaces copy operations (ALOAD / ASTORE) between the producers and + * consumers with corresponding copy operations for the values stored in the box. The returned + * `Int` value returns the next free local variable slot. + * + * Examples: + * - for an Integer box, an ASTORE x is simply replaced by ISTORE x + * - for a pair of two references, an ASTORE x is replaced by `ASTORE x1; ASTORE x2` where x1 + * and x2 are fresh locals + * + * Not all copy operations can be supported: DUP only works for single-value boxes, the more + * exotic copy operations (DUP2_X2) are not supported (note that Scalac never emits them). If a + * copy operation cannot be replaced, this method returns `None`. + */ + def checkCopyOpReplacements(initialProds: Set[BoxCreation], finalCons: Set[BoxConsumer], valueTypes: List[Type], nextLocal: Int, prodCons: ProdConsAnalyzer): Option[(Map[AbstractInsnNode, List[AbstractInsnNode]], Int, Map[Int, Type])] = { + var replacements = Map.empty[AbstractInsnNode, List[AbstractInsnNode]] + var reTypedLocals = Map.empty[Int, Type] + + var nextCopyOpLocal = nextLocal + val newLocalsMap: mutable.LongMap[List[(Type, Int)]] = mutable.LongMap.empty + def newLocals(index: Int) = newLocalsMap.getOrElseUpdate(index, valueTypes match { + case List(t) if t.getSize == 1 => + reTypedLocals += index -> t + List((t, index)) + case _ => valueTypes.map(t => { + val newIndex = nextCopyOpLocal + nextCopyOpLocal += t.getSize + (t, newIndex) + }) + }) + + var replaceOK = true + val copyOps = new CopyOpsIterator(initialProds, finalCons, prodCons) + while (replaceOK && copyOps.hasNext) copyOps.next() match { + case vi: VarInsnNode => + val isLoad = vi.getOpcode == ALOAD + val typedVarOp = (tp: (Type, Int)) => { + val opc = tp._1.getOpcode(if (isLoad) ILOAD else ISTORE) + new VarInsnNode(opc, tp._2) + } + val locs = newLocals(vi.`var`) + replacements += vi -> (if (isLoad) locs.map(typedVarOp) else locs.reverseMap(typedVarOp)) + + case copyOp => + if (copyOp.getOpcode == DUP && valueTypes.lengthCompare(1) == 0) { + if (valueTypes.head.getSize == 2) + replacements += copyOp -> List(new InsnNode(DUP2)) + } else { + replaceOK = false + } + } + if (replaceOK) Some((replacements, nextCopyOpLocal, reTypedLocals)) else None + } + + /** + * For a set of box creation operations and a corresponding set of box consumer operations, + * this iterator returns all copy operations (load, store, dup) that are in between. + */ + class CopyOpsIterator(initialCreations: Set[BoxCreation], finalCons: Set[BoxConsumer], prodCons: ProdConsAnalyzer) extends Iterator[AbstractInsnNode] { + private var queue = mutable.Queue.empty[AbstractInsnNode] ++ initialCreations.iterator.flatMap(_.boxConsumers(prodCons, ultimate = false)) + + // a single copy operation can consume multiple producers: val a = if (b) box(1) else box(2). + // the `ASTORE a` has two producers (the two box operations). we need to handle it only once. + private val visited = mutable.Set.empty[AbstractInsnNode] + + private val boxConsumingOps = finalCons.map(_.consumer) + + @tailrec private def advanceToNextCopyOp(): Unit = { + if (queue.nonEmpty) { + val h = queue.front + if (visited(h) || boxConsumingOps(h)) { + queue.dequeue() + advanceToNextCopyOp() + } + } + } + + def hasNext: Boolean = { + advanceToNextCopyOp() + queue.nonEmpty + } + + def next(): AbstractInsnNode = { + advanceToNextCopyOp() + val r = queue.dequeue() + visited += r + queue ++= prodCons.consumersOfOutputsFrom(r) + r + } + } + + trait BoxKind { + def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] + def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] + def boxedTypes: List[Type] + def extractedValueIndex(extraction: BoxConsumer): Int + def isMutable: Boolean + } + + object BoxKind { + def valueCreationKind(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[(BoxCreation, BoxKind)] = { + PrimitiveBox.checkPrimitiveBox(insn, None, prodCons) orElse + Ref.checkRefCreation(insn, None, prodCons) orElse + Tuple.checkTupleCreation(insn, None, prodCons) + } + + /** + * Check if `newOp` is part of a standard object construction pattern in which: + * + * NEW T + * DUP + * [load constructor args] + * INVOKESPECIAL T.init + * + * The method ensures that the entire construction pattern is closed in itself, without any + * branches going in or out. This is checked by looking at producers / consumers: + * - `DUP` is the only consumer of `NEW`, and vice versa + * - `DUP` the only producer for the receiver of the constructor call + * - The set of consumers of `DUP` without the constructor call is the same as + * the set of consumers of the value on the stack top after the constructor call + */ + def checkInstanceCreation(newOp: TypeInsnNode, prodCons: ProdConsAnalyzer): Option[(InsnNode, MethodInsnNode)] = { + val newCons = prodCons.consumersOfOutputsFrom(newOp) + if (newCons.size == 1 && newCons.head.getOpcode == DUP) { + val dupOp = newCons.head.asInstanceOf[InsnNode] + if (prodCons.producersForInputsOf(dupOp) == Set(newOp)) { + val dupCons = prodCons.consumersOfOutputsFrom(dupOp) + val initCalls = dupCons collect { + case mi: MethodInsnNode if mi.name == GenBCode.INSTANCE_CONSTRUCTOR_NAME && mi.owner == newOp.desc => mi + } + if (initCalls.size == 1) { + val initCall = initCalls.head + val numArgs = Type.getArgumentTypes(initCall.desc).length + val receiverProds = prodCons.producersForValueAt(initCall, prodCons.frameAt(initCall).stackTop - numArgs) + if (receiverProds == Set(dupOp)) { + val dupConsWithoutInit = dupCons - initCall + val afterInit = initCall.getNext + val stackTopAfterInit = prodCons.frameAt(afterInit).stackTop + val initializedInstanceCons = prodCons.consumersOfValueAt(afterInit, stackTopAfterInit) + if (initializedInstanceCons == dupConsWithoutInit && prodCons.producersForValueAt(afterInit, stackTopAfterInit) == Set(dupOp)) { + return Some((dupOp, initCall)) + } + } + } + } + } + None + } + + /** + * If `mi` is an invocation of a method on Predef, check if the receiver is a GETSTATIC of + * Predef.MODULE$ and return it. + */ + def checkReceiverPredefLoad(mi: MethodInsnNode, prodCons: ProdConsAnalyzer): Option[AbstractInsnNode] = { + val numArgs = Type.getArgumentTypes(mi.desc).length + val receiverProds = prodCons.producersForValueAt(mi, prodCons.frameAt(mi).stackTop - numArgs) + if (receiverProds.size == 1) { + val prod = receiverProds.head + if (isPredefLoad(prod) && prodCons.consumersOfOutputsFrom(prod) == Set(mi)) return Some(prod) + } + None + } + } + + case class PrimitiveBox(boxedType: Type, boxClass: InternalName) extends BoxKind { + import PrimitiveBox._ + def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkPrimitiveBox(insn, Some(this), prodCons).map(_._1) + def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkPrimitiveUnbox(insn, this, prodCons) + def boxedTypes: List[Type] = List(boxedType) + def extractedValueIndex(extraction: BoxConsumer): Int = 0 + def isMutable = false + } + + object PrimitiveBox { + private def boxedType(mi: MethodInsnNode) = Type.getArgumentTypes(mi.desc)(0) + + private def boxClass(mi: MethodInsnNode) = { + if (mi.name == GenBCode.INSTANCE_CONSTRUCTOR_NAME) mi.owner + else Type.getReturnType(mi.desc).getInternalName + } + + def checkPrimitiveBox(insn: AbstractInsnNode, expectedKind: Option[PrimitiveBox], prodCons: ProdConsAnalyzer): Option[(BoxCreation, PrimitiveBox)] = { + // mi is either a box factory or a box constructor invocation + def checkKind(mi: MethodInsnNode) = expectedKind match { + case Some(kind) => if (kind.boxClass == boxClass(mi)) expectedKind else None + case None => Some(PrimitiveBox(boxedType(mi), boxClass(mi))) + } + + insn match { + case mi: MethodInsnNode => + if (isScalaBox(mi) || isJavaBox(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = None), _)) + else if (isPredefAutoBox(mi)) + for (predefLoad <- BoxKind.checkReceiverPredefLoad(mi, prodCons); kind <- checkKind(mi)) + yield (ModuleFactory(predefLoad, mi), kind) + else None + + case ti: TypeInsnNode if ti.getOpcode == NEW => + for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isPrimitiveBoxConstructor(initCall); kind <- checkKind(initCall)) + yield (InstanceCreation(ti, dupOp, initCall), kind) + + case _ => None + } + } + + def checkPrimitiveUnbox(insn: AbstractInsnNode, kind: PrimitiveBox, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = { + def typeOK(mi: MethodInsnNode) = kind.boxedType == Type.getReturnType(mi.desc) + insn match { + case mi: MethodInsnNode => + if ((isScalaUnbox(mi) || isJavaUnbox(mi)) && typeOK(mi)) Some(StaticGetterOrInstanceRead(mi)) + else if (isPredefAutoUnbox(mi) && typeOK(mi)) BoxKind.checkReceiverPredefLoad(mi, prodCons).map(ModuleGetter(_, mi)) + else None + + case _ => None + } + } + } + + case class Ref(boxedType: Type, refClass: InternalName) extends BoxKind { + import Ref._ + def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkRefCreation(insn, Some(this), prodCons).map(_._1) + def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkRefConsumer(insn, this, prodCons) + def boxedTypes: List[Type] = List(boxedType) + def extractedValueIndex(extraction: BoxConsumer): Int = 0 + def isMutable = true + } + + object Ref { + private def boxedType(mi: MethodInsnNode): Type = runtimeRefClassBoxedType(mi.owner) + private def refClass(mi: MethodInsnNode): InternalName = mi.owner + private def loadZeroValue(refZeroCall: MethodInsnNode): List[AbstractInsnNode] = List(loadZeroForTypeSort(runtimeRefClassBoxedType(refZeroCall.owner).getSort)) + + def checkRefCreation(insn: AbstractInsnNode, expectedKind: Option[Ref], prodCons: ProdConsAnalyzer): Option[(BoxCreation, Ref)] = { + def checkKind(mi: MethodInsnNode): Option[Ref] = expectedKind match { + case Some(kind) => if (kind.refClass == refClass(mi)) expectedKind else None + case None => Some(Ref(boxedType(mi), refClass(mi))) + } + + insn match { + case mi: MethodInsnNode => + if (isRefCreate(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = None), _)) + else if (isRefZero(mi)) checkKind(mi).map((StaticFactory(mi, loadInitialValues = Some(loadZeroValue(mi))), _)) + else None + + case ti: TypeInsnNode if ti.getOpcode == NEW => + for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isRuntimeRefConstructor(initCall); kind <- checkKind(initCall)) + yield (InstanceCreation(ti, dupOp, initCall), kind) + + case _ => None + } + } + + def checkRefConsumer(insn: AbstractInsnNode, kind: Ref, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = insn match { + case fi: FieldInsnNode if fi.owner == kind.refClass && fi.name == "elem" => + if (fi.getOpcode == GETFIELD) Some(StaticGetterOrInstanceRead(fi)) + else if (fi.getOpcode == PUTFIELD) Some(StaticSetterOrInstanceWrite(fi)) + else None + + case _ => None + } + } + + case class Tuple(boxedTypes: List[Type], tupleClass: InternalName) extends BoxKind { + import Tuple._ + def checkBoxCreation(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxCreation] = checkTupleCreation(insn, Some(this), prodCons).map(_._1) + def checkBoxConsumer(insn: AbstractInsnNode, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = checkTupleExtraction(insn, this, prodCons) + def extractedValueIndex(extraction: BoxConsumer): Int = extraction match { + case StaticGetterOrInstanceRead(mi: MethodInsnNode) => tupleGetterIndex(mi.name) + case PrimitiveBoxingGetter(mi) => tupleGetterIndex(mi.name) + case PrimitiveUnboxingGetter(mi, _) => tupleGetterIndex(mi.name) + case _ => throw new AssertionError(s"Expected tuple getter, found $extraction") + } + def isMutable = false + } + + object Tuple { + private def boxedTypes(mi: MethodInsnNode): List[Type] = Type.getArgumentTypes(mi.desc).toList + private def tupleClass(mi: MethodInsnNode): InternalName = mi.owner + + def checkTupleCreation(insn: AbstractInsnNode, expectedKind: Option[Tuple], prodCons: ProdConsAnalyzer): Option[(BoxCreation, Tuple)] = { + def checkKind(mi: MethodInsnNode): Option[Tuple] = expectedKind match { + case Some(kind) => if (kind.tupleClass == tupleClass(mi)) expectedKind else None + case None => Some(Tuple(boxedTypes(mi), tupleClass(mi))) + } + + insn match { + // no need to check for TupleN.apply: the compiler transforms case companion apply calls to constructor invocations + case ti: TypeInsnNode if ti.getOpcode == NEW => + for ((dupOp, initCall) <- BoxKind.checkInstanceCreation(ti, prodCons) if isTupleConstructor(initCall); kind <- checkKind(initCall)) + yield (InstanceCreation(ti, dupOp, initCall), kind) + + case _ => None + } + } + + private val specializedTupleClassR = "scala/Tuple[12]\\$mc[IJDCZ]{1,2}\\$sp".r + private def isSpecializedTupleClass(tupleClass: InternalName) = specializedTupleClassR.pattern.matcher(tupleClass).matches + + private val specializedTupleGetterR = "_[12]\\$mc[IJDCZ]\\$sp".r + private def isSpecializedTupleGetter(mi: MethodInsnNode) = specializedTupleGetterR.pattern.matcher(mi.name).matches + + private val tupleGetterR = "_\\d\\d?".r + private def isTupleGetter(mi: MethodInsnNode) = tupleGetterR.pattern.matcher(mi.name).matches + + def checkTupleExtraction(insn: AbstractInsnNode, kind: Tuple, prodCons: ProdConsAnalyzer): Option[BoxConsumer] = { + val expectedTupleClass = kind.tupleClass + insn match { + case mi: MethodInsnNode => + val tupleClass = mi.owner + if (isSpecializedTupleClass(expectedTupleClass)) { + val typeOK = tupleClass == expectedTupleClass || tupleClass == expectedTupleClass.substring(0, expectedTupleClass.indexOf('$')) + if (typeOK) { + if (isSpecializedTupleGetter(mi)) return Some(StaticGetterOrInstanceRead(mi)) + else if (isTupleGetter(mi)) return Some(PrimitiveBoxingGetter(mi)) + } + } else if (expectedTupleClass == tupleClass) { + if (isSpecializedTupleGetter(mi)) return Some(PrimitiveUnboxingGetter(mi, Type.getReturnType(mi.desc))) + else if (isTupleGetter(mi)) return Some(StaticGetterOrInstanceRead(mi)) + } + + case _ => + } + None + } + + private val getterIndexPattern = "_(\\d{1,2}).*".r + def tupleGetterIndex(getterName: String) = getterName match { case getterIndexPattern(i) => i.toInt - 1 } + } + + // TODO: add more + // case class ValueClass(valueClass: Type, valueType: Type) extends BoxKind + + sealed trait BoxCreation { + // to support box creation operations that don't consume an initial value from the stack, e.g., IntRef.zero + val loadInitialValues: Option[List[AbstractInsnNode]] + + /** + * The instruction that produces the box value; for instance creations, the `NEW` operation. + */ + def producer: AbstractInsnNode + + /** + * The instruction that consumes the boxed values; for instance creations, the `init` call. + */ + def valuesConsumer: MethodInsnNode = this match { + case StaticFactory(call, _) => call + case ModuleFactory(_, call) => call + case InstanceCreation(_, _, initCall) => initCall + } + + def allInsns: Set[AbstractInsnNode] = this match { + case StaticFactory(c, _) => Set(c) + case ModuleFactory(m, c) => Set(m, c) + case InstanceCreation(n, d, i) => Set(n, d, i) + } + + /** + * The consumers of the box produced by this box creation. If `ultimate` is true, then the + * final consumers are returned (e.g., an unbox operation), otherwise direct consumers (e.g., + * a store operation). + */ + def boxConsumers(prodCons: ProdConsAnalyzer, ultimate: Boolean): Set[AbstractInsnNode] = { + val startInsn = this match { + // for the non-transitive case (ultimate == false), it's important to start at the `dupOp`, + // not the `newOp` - look at the BoxCreation as a black box, get its consumers. + case InstanceCreation(_, dupOp, _) => dupOp + case _ => producer + } + val cons = if (ultimate) prodCons.ultimateConsumersOfOutputsFrom(startInsn) else prodCons.consumersOfOutputsFrom(startInsn) + this match { + case InstanceCreation(_, _, initCall) => cons - initCall + case _ => cons + } + } + } + + case class StaticFactory(producer: MethodInsnNode, loadInitialValues: Option[List[AbstractInsnNode]]) extends BoxCreation + case class ModuleFactory(moduleLoad: AbstractInsnNode, producer: MethodInsnNode) extends BoxCreation { + val loadInitialValues: Option[List[AbstractInsnNode]] = None + } + case class InstanceCreation(newOp: TypeInsnNode, dupOp: InsnNode, initCall: MethodInsnNode) extends BoxCreation { + def producer = newOp + val loadInitialValues: Option[List[AbstractInsnNode]] = None + } + + sealed trait BoxConsumer { + val consumer: AbstractInsnNode + + def allInsns: Set[AbstractInsnNode] = this match { + case ModuleGetter(m, c) => Set(m, c) + case _ => Set(consumer) + } + + /** + * The initial producers of the box value consumed by this box consumer + */ + def boxProducers(prodCons: ProdConsAnalyzer): Set[AbstractInsnNode] = { + val stackTop = prodCons.frameAt(consumer).stackTop + val slot = if (isWrite) stackTop - 1 else stackTop + prodCons.initialProducersForValueAt(consumer, slot) + } + + def isEscaping = this match { + case _: EscapingConsumer => true + case _ => false + } + + def isWrite = this match { + case _: StaticSetterOrInstanceWrite => true + case _ => false + } + + /** + * If this box consumer extracts a boxed value and applies a conversion, this method returns + * equivalent conversion operations. For example, invoking `_1$mcI$sp` on a non-specialized + * `Tuple2` extracts the Integer value and unboxes it. + */ + def postExtractionAdaptationOps(typeOfExtractedValue: Type): List[AbstractInsnNode] = this match { + case PrimitiveBoxingGetter(_) => List(getScalaBox(typeOfExtractedValue)) + case PrimitiveUnboxingGetter(_, unboxedPrimitive) => List(getScalaUnbox(unboxedPrimitive)) + case _ => Nil + } + } + + /** Static extractor (BoxesRunTime.unboxToInt) or GETFIELD or getter invocation */ + case class StaticGetterOrInstanceRead(consumer: AbstractInsnNode) extends BoxConsumer + /** A getter that boxes the returned value, e.g., `Tuple2$mcII$sp._1` */ + case class PrimitiveBoxingGetter(consumer: MethodInsnNode) extends BoxConsumer + /** A getter that unboxes the returned value, e.g., `Tuple2._1$mcI$sp` */ + case class PrimitiveUnboxingGetter(consumer: MethodInsnNode, unboxedPrimitive: Type) extends BoxConsumer + /** An extractor method in a Scala module, e.g., `Predef.Integer2int` */ + case class ModuleGetter(moduleLoad: AbstractInsnNode, consumer: MethodInsnNode) extends BoxConsumer + /** PUTFIELD or setter invocation */ + case class StaticSetterOrInstanceWrite(consumer: AbstractInsnNode) extends BoxConsumer + /** An unknown box consumer */ + case class EscapingConsumer(consumer: AbstractInsnNode) extends BoxConsumer +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index a5b85e54e7..f2ff73c44d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -9,13 +9,12 @@ package opt import scala.tools.asm import asm.tree._ -import scala.collection.convert.decorateAsScala._ +import scala.collection.JavaConverters._ +import scala.collection.{concurrent, mutable} import scala.tools.asm.Attribute import scala.tools.nsc.backend.jvm.BackendReporting._ -import scala.tools.nsc.io.AbstractFile -import scala.tools.nsc.util.ClassFileLookup +import scala.tools.nsc.util.ClassPath import BytecodeUtils._ -import ByteCodeRepository._ import BTypes.InternalName import java.util.concurrent.atomic.AtomicLong @@ -24,58 +23,91 @@ import java.util.concurrent.atomic.AtomicLong * classpath. Parsed classes are cached in the `classes` map. * * @param classPath The compiler classpath where classfiles are searched and read from. - * @param classes Cache for parsed ClassNodes. Also stores the source of the bytecode: - * [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode - * corresponds to a class being compiled. - * The `Long` field encodes the age of the node in the map, which allows removing - * old entries when the map grows too large. - * For Java classes in mixed compilation, the map contains an error message: no - * ClassNode is generated by the backend and also no classfile that could be parsed. */ -class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJavaSourceDefined: InternalName => Boolean, val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]]) { +class ByteCodeRepository[BT <: BTypes](val classPath: ClassPath, val btypes: BT) { + import btypes._ + + /** + * Contains ClassNodes and the canonical path of the source file path of classes being compiled in + * the current compilation run. + */ + val compilingClasses: concurrent.Map[InternalName, (ClassNode, String)] = recordPerRunCache(concurrent.TrieMap.empty) + + /** + * Cache for parsed ClassNodes. + * The `Long` field encodes the age of the node in the map, which allows removing old entries when + * the map grows too large (see limitCacheSize). + * For Java classes in mixed compilation, the map contains an error message: no ClassNode is + * generated by the backend and also no classfile that could be parsed. + */ + val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty) private val maxCacheSize = 1500 private val targetSize = 500 - private val idCounter = new AtomicLong(0) + private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable { + def clear(): Unit = { this.set(0l) } + } + recordPerRunCache(lruCounter) /** * Prevent the code repository from growing too large. Profiling reveals that the average size * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb. - * - * We can only remove classes with `Source == Classfile`, those can be parsed again if requested. */ private def limitCacheSize(): Unit = { - if (classes.count(c => c._2.isRight && c._2.right.get._2 == Classfile) > maxCacheSize) { - val removeId = idCounter.get - targetSize - val toRemove = classes.iterator.collect({ - case (name, Right((_, Classfile, id))) if id < removeId => name - }).toList - toRemove foreach classes.remove + if (parsedClasses.size > maxCacheSize) { + // OK if multiple threads get here + val minimalLRU = parsedClasses.valuesIterator.collect({ + case Right((_, lru)) => lru + }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue) + parsedClasses retain { + case (_, Right((_, lru))) => lru > minimalLRU + case _ => false + } } } - def add(classNode: ClassNode, source: Source) = { - classes(classNode.name) = Right((classNode, source, idCounter.incrementAndGet())) + def add(classNode: ClassNode, sourceFilePath: Option[String]) = sourceFilePath match { + case Some(path) if path != "<no file>" => compilingClasses(classNode.name) = (classNode, path) + case _ => parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet())) + } + + private def parsedClassNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = { + val r = parsedClasses.get(internalName) match { + case Some(l @ Left(_)) => l + case Some(r @ Right((classNode, _))) => + parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet())) + r + case None => + limitCacheSize() + val res = parseClass(internalName).map((_, lruCounter.incrementAndGet())) + parsedClasses(internalName) = res + res + } + r.map(_._1) } /** - * The class node and source for an internal name. If the class node is not yet available, it is - * parsed from the classfile on the compile classpath. + * The class node and source file path (if the class is being compiled) for an internal name. If + * the class node is not yet available, it is parsed from the classfile on the compile classpath. */ - def classNodeAndSource(internalName: InternalName): Either[ClassNotFound, (ClassNode, Source)] = { - val r = classes.getOrElseUpdate(internalName, { - limitCacheSize() - parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet())) - }) - r.map(v => (v._1, v._2)) + def classNodeAndSourceFilePath(internalName: InternalName): Either[ClassNotFound, (ClassNode, Option[String])] = { + compilingClasses.get(internalName) match { + case Some((c, p)) => Right((c, Some(p))) + case _ => parsedClassNode(internalName).map((_, None)) + } } /** * The class node for an internal name. If the class node is not yet available, it is parsed from * the classfile on the compile classpath. */ - def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = classNodeAndSource(internalName).map(_._1) + def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = { + compilingClasses.get(internalName) match { + case Some((c, _)) => Right(c) + case None => parsedClassNode(internalName) + } + } /** * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`. @@ -86,7 +118,6 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav */ def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = { def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = { - def msg = s"The field node $name$descriptor could not be found in class $classInternalName or any of its superclasses." classNode(parent) match { case Left(e) => Left(FieldNotFound(name, descriptor, classInternalName, Some(e))) case Right(c) => @@ -105,33 +136,135 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`. * The declaration of the method may be in one of the parents. * + * Note that the JVM spec performs method lookup in two steps: resolution and selection. + * + * Method resolution, defined in jvms-5.4.3.3 and jvms-5.4.3.4, is the first step and is identical + * for all invocation styles (virtual, interface, special, static). If C is the receiver class + * in the invocation instruction: + * 1 find a matching method (name and descriptor) in C + * 2 then in C's superclasses + * 3 then find the maximally-specific matching superinterface methods, succeed if there's a + * single non-abstract one. static and private methods in superinterfaces are not considered. + * 4 then pick a random non-static, non-private superinterface method. + * 5 then fail. + * + * Note that for an `invokestatic` instruction, a method reference `B.m` may resolve to `A.m`, if + * class `B` doesn't specify a matching method `m`, but the parent `A` does. + * + * Selection depends on the invocation style and is defined in jvms-6.5. + * - invokestatic: invokes the resolved method + * - invokevirtual / invokeinterface: searches for an override of the resolved method starting + * at the dynamic receiver type. the search procedure is basically the same as in resolution, + * but it fails at 4 instead of picking a superinterface method at random. + * - invokespecial: if C is the receiver in the invocation instruction, searches for an override + * of the resolved method starting at + * - the superclass of the current class, if C is a superclass of the current class + * - C otherwise + * again, the search procedure is the same. + * + * In the method here we implement method *resolution*. Whether or not the returned method is + * actually invoked at runtime depends on the invocation instruction and the class hierarchy, so + * the users (e.g. the inliner) have to be aware of method selection. + * + * Note that the returned method may be abstract (ACC_ABSTRACT), native (ACC_NATIVE) or signature + * polymorphic (methods `invoke` and `invokeExact` in class `MethodHandles`). + * * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring - * class, or an error message if the method could not be found. + * class, or an error message if the method could not be found. An error message is also + * returned if method resolution results in multiple default methods. */ def methodNode(ownerInternalNameOrArrayDescriptor: String, name: String, descriptor: String): Either[MethodNotFound, (MethodNode, InternalName)] = { - // on failure, returns a list of class names that could not be found on the classpath - def methodNodeImpl(ownerInternalName: InternalName): Either[List[ClassNotFound], (MethodNode, InternalName)] = { - classNode(ownerInternalName) match { - case Left(e) => Left(List(e)) - case Right(c) => - c.methods.asScala.find(m => m.name == name && m.desc == descriptor) match { - case Some(m) => Right((m, ownerInternalName)) - case None => findInParents(Option(c.superName) ++: c.interfaces.asScala.toList, Nil) - } + def findMethod(c: ClassNode): Option[MethodNode] = c.methods.asScala.find(m => m.name == name && m.desc == descriptor) + + // https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-2.html#jvms-2.9: "In Java SE 8, the only + // signature polymorphic methods are the invoke and invokeExact methods of the class MethodHandle. + def isSignaturePolymorphic(owner: InternalName) = owner == coreBTypes.jliMethodHandleRef.internalName && (name == "invoke" || name == "invokeExact") + + // Note: if `owner` is an interface, in the first iteration we search for a matching member in the interface itself. + // If that fails, the recursive invocation checks in the superclass (which is Object) with `publicInstanceOnly == true`. + // This is specified in jvms-5.4.3.4: interface method resolution only returns public, non-static methods of Object. + def findInSuperClasses(owner: ClassNode, publicInstanceOnly: Boolean = false): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = { + findMethod(owner) match { + case Some(m) if !publicInstanceOnly || (isPublicMethod(m) && !isStaticMethod(m)) => Right(Some((m, owner.name))) + case None => + if (isSignaturePolymorphic(owner.name)) Right(Some((owner.methods.asScala.find(_.name == name).get, owner.name))) + else if (owner.superName == null) Right(None) + else classNode(owner.superName).flatMap(findInSuperClasses(_, isInterface(owner))) } } - // find the MethodNode in one of the parent classes - def findInParents(parents: List[InternalName], failedClasses: List[ClassNotFound]): Either[List[ClassNotFound], (MethodNode, InternalName)] = parents match { - case x :: xs => methodNodeImpl(x).left.flatMap(failed => findInParents(xs, failed ::: failedClasses)) - case Nil => Left(failedClasses) + def findInInterfaces(initialOwner: ClassNode): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = { + val visited = mutable.Set.empty[InternalName] + val found = mutable.ListBuffer.empty[(MethodNode, ClassNode)] + + def findIn(owner: ClassNode): Option[ClassNotFound] = { + for (i <- owner.interfaces.asScala if !visited(i)) classNode(i) match { + case Left(e) => return Some(e) + case Right(c) => + visited += i + // abstract and static methods are excluded, see jvms-5.4.3.3 + for (m <- findMethod(c) if !isPrivateMethod(m) && !isStaticMethod(m)) found += ((m, c)) + val recursionResult = findIn(c) + if (recursionResult.isDefined) return recursionResult + } + None + } + + findIn(initialOwner) + + val result = + if (found.size <= 1) found.headOption + else { + val maxSpecific = found.filterNot({ + case (method, owner) => + isAbstractMethod(method) || { + val ownerTp = classBTypeFromClassNode(owner) + found exists { + case (other, otherOwner) => + (other ne method) && { + val otherTp = classBTypeFromClassNode(otherOwner) + otherTp.isSubtypeOf(ownerTp).get + } + } + } + }) + // (*) note that if there's no single, non-abstract, maximally-specific method, the jvm + // method resolution (jvms-5.4.3.3) returns any of the non-private, non-static parent + // methods at random (abstract or concrete). + // we chose not to do this here, to prevent the inliner from potentially inlining the + // wrong method. in other words, we guarantee that a concrete method is only returned if + // it resolves deterministically. + // however, there may be multiple abstract methods inherited. in this case we *do* want + // to return a result to allow performing accessibility checks in the inliner. note that + // for accessibility it does not matter which of these methods is return, as they are all + // non-private (i.e., public, protected is not possible, jvms-4.1). + // the remaining case (when there's no max-specific method, but some non-abstract one) + // does not occur in bytecode generated by scalac or javac. we return no result in this + // case. this may at worst prevent some optimizations from happening. + if (maxSpecific.size == 1) maxSpecific.headOption + else if (found.forall(p => isAbstractMethod(p._1))) found.headOption // (*) + else None + } + Right(result.map(p => (p._1, p._2.name))) } // In a MethodInsnNode, the `owner` field may be an array descriptor, for example when invoking `clone`. We don't have a method node to return in this case. - if (ownerInternalNameOrArrayDescriptor.charAt(0) == '[') - Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, Nil)) - else - methodNodeImpl(ownerInternalNameOrArrayDescriptor).left.map(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, _)) + if (ownerInternalNameOrArrayDescriptor.charAt(0) == '[') { + Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, None)) + } else { + def notFound(cnf: Option[ClassNotFound]) = Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, cnf)) + val res: Either[ClassNotFound, Option[(MethodNode, InternalName)]] = classNode(ownerInternalNameOrArrayDescriptor).flatMap(c => + findInSuperClasses(c) flatMap { + case None => findInInterfaces(c) + case res => Right(res) + } + ) + res match { + case Left(e) => notFound(Some(e)) + case Right(None) => notFound(None) + case Right(Some(res)) => Right(res) + } + } } private def parseClass(internalName: InternalName): Either[ClassNotFound, ClassNode] = { @@ -157,17 +290,7 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav classNode } match { case Some(node) => Right(node) - case None => Left(ClassNotFound(internalName, isJavaSourceDefined(internalName))) + case None => Left(ClassNotFound(internalName, javaDefinedClasses(internalName))) } } } - -object ByteCodeRepository { - /** - * The source of a ClassNode in the ByteCodeRepository. Can be either [[CompilationUnit]] if the - * class is being compiled or [[Classfile]] if the class was parsed from the compilation classpath. - */ - sealed trait Source - object CompilationUnit extends Source - object Classfile extends Source -} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala index 7aadd2c466..bfd92cac5c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala @@ -8,28 +8,29 @@ package backend.jvm package opt import scala.annotation.{tailrec, switch} + import scala.collection.mutable import scala.reflect.internal.util.Collections._ import scala.tools.asm.commons.CodeSizeEvaluator import scala.tools.asm.tree.analysis._ -import scala.tools.asm.{MethodWriter, ClassWriter, Label, Opcodes, Type} +import scala.tools.asm.{Label, Type} +import scala.tools.asm.Opcodes._ import scala.tools.asm.tree._ import GenBCode._ -import scala.collection.convert.decorateAsScala._ -import scala.collection.convert.decorateAsJava._ -import scala.tools.nsc.backend.jvm.BTypes._ +import scala.collection.JavaConverters._ +import scala.tools.nsc.backend.jvm.analysis.InstructionStackEffect object BytecodeUtils { // http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.9.1 - final val maxJVMMethodSize = 65535 + final val maxJVMMethodSize = 65535 // 5% margin, more than enough for the instructions added by the inliner (store / load args, null check for instance methods) final val maxMethodSizeAfterInline = maxJVMMethodSize - (maxJVMMethodSize / 20) object Goto { def unapply(instruction: AbstractInsnNode): Option[JumpInsnNode] = { - if (instruction.getOpcode == Opcodes.GOTO) Some(instruction.asInstanceOf[JumpInsnNode]) + if (instruction.getOpcode == GOTO) Some(instruction.asInstanceOf[JumpInsnNode]) else None } } @@ -49,8 +50,9 @@ object BytecodeUtils { } object VarInstruction { - def unapply(instruction: AbstractInsnNode): Option[VarInsnNode] = { - if (isVarInstruction(instruction)) Some(instruction.asInstanceOf[VarInsnNode]) + def unapply(instruction: AbstractInsnNode): Option[(AbstractInsnNode, Int)] = { + if (isLoadStoreOrRet(instruction)) Some((instruction, instruction.asInstanceOf[VarInsnNode].`var`)) + else if (instruction.getOpcode == IINC) Some((instruction, instruction.asInstanceOf[IincInsnNode].`var`)) else None } @@ -59,30 +61,46 @@ object BytecodeUtils { def isJumpNonJsr(instruction: AbstractInsnNode): Boolean = { val op = instruction.getOpcode // JSR is deprecated in classfile version 50, disallowed in 51. historically, it was used to implement finally. - op == Opcodes.GOTO || isConditionalJump(instruction) + op == GOTO || isConditionalJump(instruction) } def isConditionalJump(instruction: AbstractInsnNode): Boolean = { val op = instruction.getOpcode - (op >= Opcodes.IFEQ && op <= Opcodes.IF_ACMPNE) || op == Opcodes.IFNULL || op == Opcodes.IFNONNULL + (op >= IFEQ && op <= IF_ACMPNE) || op == IFNULL || op == IFNONNULL } def isReturn(instruction: AbstractInsnNode): Boolean = { val op = instruction.getOpcode - op >= Opcodes.IRETURN && op <= Opcodes.RETURN + op >= IRETURN && op <= RETURN } def isLoad(instruction: AbstractInsnNode): Boolean = { val op = instruction.getOpcode - op >= Opcodes.ILOAD && op <= Opcodes.ALOAD + op >= ILOAD && op <= ALOAD } def isStore(instruction: AbstractInsnNode): Boolean = { val op = instruction.getOpcode - op >= Opcodes.ISTORE && op <= Opcodes.ASTORE + op >= ISTORE && op <= ASTORE + } + + def isLoadStoreOrRet(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction) || instruction.getOpcode == RET + + def isLoadOrStore(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction) + + def isNonVirtualCall(instruction: AbstractInsnNode): Boolean = { + val op = instruction.getOpcode + op == INVOKESPECIAL || op == INVOKESTATIC } - def isVarInstruction(instruction: AbstractInsnNode): Boolean = isLoad(instruction) || isStore(instruction) + def isVirtualCall(instruction: AbstractInsnNode): Boolean = { + val op = instruction.getOpcode + op == INVOKEVIRTUAL || op == INVOKEINTERFACE + } + + def isCall(instruction: AbstractInsnNode): Boolean = { + isNonVirtualCall(instruction) || isVirtualCall(instruction) + } def isExecutable(instruction: AbstractInsnNode): Boolean = instruction.getOpcode >= 0 @@ -90,27 +108,40 @@ object BytecodeUtils { methodNode.name == INSTANCE_CONSTRUCTOR_NAME || methodNode.name == CLASS_CONSTRUCTOR_NAME } - def isStaticMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_STATIC) != 0 + def isPublicMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_PUBLIC) != 0 - def isAbstractMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_ABSTRACT) != 0 + def isPrivateMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_PRIVATE) != 0 - def isSynchronizedMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_SYNCHRONIZED) != 0 + def isStaticMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_STATIC) != 0 - def isNativeMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_NATIVE) != 0 + def isAbstractMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_ABSTRACT) != 0 - def isFinalClass(classNode: ClassNode): Boolean = (classNode.access & Opcodes.ACC_FINAL) != 0 + def isSynchronizedMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_SYNCHRONIZED) != 0 - def isFinalMethod(methodNode: MethodNode): Boolean = (methodNode.access & (Opcodes.ACC_FINAL | Opcodes.ACC_PRIVATE | Opcodes.ACC_STATIC)) != 0 + def isNativeMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_NATIVE) != 0 - def isStrictfpMethod(methodNode: MethodNode): Boolean = (methodNode.access & Opcodes.ACC_STRICT) != 0 + def hasCallerSensitiveAnnotation(methodNode: MethodNode): Boolean = methodNode.visibleAnnotations != null && methodNode.visibleAnnotations.asScala.exists(_.desc == "Lsun/reflect/CallerSensitive;") + + def isFinalClass(classNode: ClassNode): Boolean = (classNode.access & ACC_FINAL) != 0 + + def isInterface(classNode: ClassNode): Boolean = (classNode.access & ACC_INTERFACE) != 0 + + def isFinalMethod(methodNode: MethodNode): Boolean = (methodNode.access & (ACC_FINAL | ACC_PRIVATE | ACC_STATIC)) != 0 + + def isStrictfpMethod(methodNode: MethodNode): Boolean = (methodNode.access & ACC_STRICT) != 0 def isReference(t: Type) = t.getSort == Type.OBJECT || t.getSort == Type.ARRAY - def nextExecutableInstruction(instruction: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = { - var result = instruction - do { result = result.getNext } - while (result != null && !isExecutable(result) && !alsoKeep(result)) - Option(result) + @tailrec def nextExecutableInstruction(insn: AbstractInsnNode, alsoKeep: AbstractInsnNode => Boolean = Set()): Option[AbstractInsnNode] = { + val next = insn.getNext + if (next == null || isExecutable(next) || alsoKeep(next)) Option(next) + else nextExecutableInstruction(next, alsoKeep) + } + + @tailrec def nextExecutableInstructionOrLabel(insn: AbstractInsnNode): Option[AbstractInsnNode] = { + val next = insn.getNext + if (next == null || isExecutable(next) || next.isInstanceOf[LabelNode]) Option(next) + else nextExecutableInstructionOrLabel(next) } def sameTargetExecutableInstruction(a: JumpInsnNode, b: JumpInsnNode): Boolean = { @@ -124,14 +155,14 @@ object BytecodeUtils { def removeJumpAndAdjustStack(method: MethodNode, jump: JumpInsnNode) { val instructions = method.instructions val op = jump.getOpcode - if ((op >= Opcodes.IFEQ && op <= Opcodes.IFGE) || op == Opcodes.IFNULL || op == Opcodes.IFNONNULL) { + if ((op >= IFEQ && op <= IFLE) || op == IFNULL || op == IFNONNULL) { instructions.insert(jump, getPop(1)) - } else if ((op >= Opcodes.IF_ICMPEQ && op <= Opcodes.IF_ICMPLE) || op == Opcodes.IF_ACMPEQ || op == Opcodes.IF_ACMPNE) { + } else if ((op >= IF_ICMPEQ && op <= IF_ICMPLE) || op == IF_ACMPEQ || op == IF_ACMPNE) { instructions.insert(jump, getPop(1)) instructions.insert(jump, getPop(1)) } else { // we can't remove JSR: its execution does not only jump, it also adds a return address to the stack - assert(jump.getOpcode == Opcodes.GOTO) + assert(jump.getOpcode == GOTO) } instructions.remove(jump) } @@ -148,37 +179,61 @@ object BytecodeUtils { } def negateJumpOpcode(jumpOpcode: Int): Int = (jumpOpcode: @switch) match { - case Opcodes.IFEQ => Opcodes.IFNE - case Opcodes.IFNE => Opcodes.IFEQ + case IFEQ => IFNE + case IFNE => IFEQ + + case IFLT => IFGE + case IFGE => IFLT - case Opcodes.IFLT => Opcodes.IFGE - case Opcodes.IFGE => Opcodes.IFLT + case IFGT => IFLE + case IFLE => IFGT - case Opcodes.IFGT => Opcodes.IFLE - case Opcodes.IFLE => Opcodes.IFGT + case IF_ICMPEQ => IF_ICMPNE + case IF_ICMPNE => IF_ICMPEQ - case Opcodes.IF_ICMPEQ => Opcodes.IF_ICMPNE - case Opcodes.IF_ICMPNE => Opcodes.IF_ICMPEQ + case IF_ICMPLT => IF_ICMPGE + case IF_ICMPGE => IF_ICMPLT - case Opcodes.IF_ICMPLT => Opcodes.IF_ICMPGE - case Opcodes.IF_ICMPGE => Opcodes.IF_ICMPLT + case IF_ICMPGT => IF_ICMPLE + case IF_ICMPLE => IF_ICMPGT - case Opcodes.IF_ICMPGT => Opcodes.IF_ICMPLE - case Opcodes.IF_ICMPLE => Opcodes.IF_ICMPGT + case IF_ACMPEQ => IF_ACMPNE + case IF_ACMPNE => IF_ACMPEQ - case Opcodes.IF_ACMPEQ => Opcodes.IF_ACMPNE - case Opcodes.IF_ACMPNE => Opcodes.IF_ACMPEQ + case IFNULL => IFNONNULL + case IFNONNULL => IFNULL + } - case Opcodes.IFNULL => Opcodes.IFNONNULL - case Opcodes.IFNONNULL => Opcodes.IFNULL + def isSize2LoadOrStore(opcode: Int): Boolean = (opcode: @switch) match { + case LLOAD | DLOAD | LSTORE | DSTORE => true + case _ => false } def getPop(size: Int): InsnNode = { - val op = if (size == 1) Opcodes.POP else Opcodes.POP2 + val op = if (size == 1) POP else POP2 new InsnNode(op) } - def instructionResultSize(instruction: AbstractInsnNode) = InstructionResultSize(instruction) + def instructionResultSize(insn: AbstractInsnNode) = InstructionStackEffect.prod(InstructionStackEffect.forClassfile(insn)) + + def loadZeroForTypeSort(sort: Int) = (sort: @switch) match { + case Type.BOOLEAN | + Type.BYTE | + Type.CHAR | + Type.SHORT | + Type.INT => new InsnNode(ICONST_0) + case Type.LONG => new InsnNode(LCONST_0) + case Type.FLOAT => new InsnNode(FCONST_0) + case Type.DOUBLE => new InsnNode(DCONST_0) + case Type.OBJECT => new InsnNode(ACONST_NULL) + } + + /** + * The number of local variable slots used for parameters and for the `this` reference. + */ + def parametersSize(methodNode: MethodNode): Int = { + (Type.getArgumentsAndReturnSizes(methodNode.desc) >> 2) - (if (isStaticMethod(methodNode)) 1 else 0) + } def labelReferences(method: MethodNode): Map[LabelNode, Set[AnyRef]] = { val res = mutable.Map.empty[LabelNode, Set[AnyRef]] @@ -222,29 +277,6 @@ object BytecodeUtils { } } - /** - * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM - * framework only computes these values during bytecode generation. - * - * Since there's currently no better way, we run a bytecode generator on the method and extract - * the computed values. This required changes to the ASM codebase: - * - the [[MethodWriter]] class was made public - * - accessors for maxLocals / maxStack were added to the MethodWriter class - * - * We could probably make this faster (and allocate less memory) by hacking the ASM framework - * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be - * to create a separate visitor for computing those values, duplicating the functionality from the - * MethodWriter. - */ - def computeMaxLocalsMaxStack(method: MethodNode): Unit = { - val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) - val excs = method.exceptions.asScala.toArray - val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] - method.accept(mw) - method.maxLocals = mw.getMaxLocals - method.maxStack = mw.getMaxStack - } - def codeSizeOKForInlining(caller: MethodNode, callee: MethodNode): Boolean = { // Looking at the implementation of CodeSizeEvaluator, all instructions except tableswitch and // lookupswitch are <= 8 bytes. These should be rare enough for 8 to be an OK rough upper bound. @@ -289,34 +321,36 @@ object BytecodeUtils { } /** - * Clone the instructions in `methodNode` into a new [[InsnList]], mapping labels according to - * the `labelMap`. Returns the new instruction list and a map from old to new instructions. - */ - def cloneInstructions(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode]): (InsnList, Map[AbstractInsnNode, AbstractInsnNode]) = { - val javaLabelMap = labelMap.asJava - val result = new InsnList - var map = Map.empty[AbstractInsnNode, AbstractInsnNode] - for (ins <- methodNode.instructions.iterator.asScala) { - val cloned = ins.clone(javaLabelMap) - result add cloned - map += ((ins, cloned)) - } - (result, map) - } - - /** * Clone the local variable descriptors of `methodNode` and map their `start` and `end` labels * according to the `labelMap`. */ - def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String): List[LocalVariableNode] = { - methodNode.localVariables.iterator().asScala.map(localVariable => new LocalVariableNode( - prefix + localVariable.name, - localVariable.desc, - localVariable.signature, - labelMap(localVariable.start), - labelMap(localVariable.end), - localVariable.index - )).toList + def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], calleeMethodName: String, shift: Int): List[LocalVariableNode] = { + methodNode.localVariables.iterator().asScala.map(localVariable => { + val name = + if (calleeMethodName.length + localVariable.name.length < BTypes.InlinedLocalVariablePrefixMaxLenght) { + calleeMethodName + "_" + localVariable.name + } else { + val parts = localVariable.name.split("_").toVector + val (methNames, varName) = (calleeMethodName +: parts.init, parts.last) + // keep at least 5 characters per method name + val maxNumMethNames = BTypes.InlinedLocalVariablePrefixMaxLenght / 5 + val usedMethNames = + if (methNames.length < maxNumMethNames) methNames + else { + val half = maxNumMethNames / 2 + methNames.take(half) ++ methNames.takeRight(half) + } + val charsPerMethod = BTypes.InlinedLocalVariablePrefixMaxLenght / usedMethNames.length + usedMethNames.foldLeft("")((res, methName) => res + methName.take(charsPerMethod) + "_") + varName + } + new LocalVariableNode( + name, + localVariable.desc, + localVariable.signature, + labelMap(localVariable.start), + labelMap(localVariable.end), + localVariable.index + shift) + }).toList } /** @@ -344,23 +378,14 @@ object BytecodeUtils { * method which explains the issue with such phantom values. */ def fixLoadedNothingOrNullValue(loadedType: Type, loadInstr: AbstractInsnNode, methodNode: MethodNode, bTypes: BTypes): Unit = { - if (loadedType == bTypes.coreBTypes.RT_NOTHING.toASMType) { - methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.ATHROW)) - } else if (loadedType == bTypes.coreBTypes.RT_NULL.toASMType) { - methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.ACONST_NULL)) - methodNode.instructions.insert(loadInstr, new InsnNode(Opcodes.POP)) + if (loadedType == bTypes.coreBTypes.srNothingRef.toASMType) { + methodNode.instructions.insert(loadInstr, new InsnNode(ATHROW)) + } else if (loadedType == bTypes.coreBTypes.srNullRef.toASMType) { + methodNode.instructions.insert(loadInstr, new InsnNode(ACONST_NULL)) + methodNode.instructions.insert(loadInstr, new InsnNode(POP)) } } - /** - * A wrapper to make ASM's Analyzer a bit easier to use. - */ - class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, interpreter: Interpreter[V] = new BasicInterpreter) { - val analyzer = new Analyzer(interpreter) - analyzer.analyze(classInternalName, methodNode) - def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) - } - implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal { def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction)) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 96455c0e38..a740ca525c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -7,182 +7,320 @@ package scala.tools.nsc package backend.jvm package opt +import scala.collection.immutable.IntMap import scala.reflect.internal.util.{NoPosition, Position} -import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter} -import scala.tools.asm.{Opcodes, Type, Handle} +import scala.tools.asm.{Handle, Opcodes, Type} import scala.tools.asm.tree._ -import scala.collection.concurrent -import scala.collection.convert.decorateAsScala._ -import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.collection.{concurrent, mutable} +import scala.collection.JavaConverters._ +import scala.tools.nsc.backend.jvm.BTypes.{InternalName, MethodInlineInfo} import scala.tools.nsc.backend.jvm.BackendReporting._ -import scala.tools.nsc.backend.jvm.analysis.{NotNull, NullnessAnalyzer} -import ByteCodeRepository.{Source, CompilationUnit} +import scala.tools.nsc.backend.jvm.analysis._ import BytecodeUtils._ class CallGraph[BT <: BTypes](val btypes: BT) { import btypes._ + import backendUtils._ - val callsites: concurrent.Map[MethodInsnNode, Callsite] = recordPerRunCache(concurrent.TrieMap.empty) + /** + * The call graph contains the callsites in the program being compiled. + * + * Indexing the call graph by the containing MethodNode and the invocation MethodInsnNode allows + * finding callsites efficiently. For example, an inlining heuristic might want to know all + * callsites within a callee method. + * + * Note that the call graph is not guaranteed to be complete: callsites may be missing. In + * particular, if a method is very large, all of its callsites might not be in the hash map. + * The reason is that adding a method to the call graph requires running an ASM analyzer, which + * can be too slow. + * + * Note that call graph entries (Callsite instances) keep a reference to the invocation + * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes + * from the classpath to the call graph (in addition to classes being compiled) may prevent + * method instruction nodes from being GCd. The ByteCodeRepository has a fixed size cache for + * parsed ClassNodes - keeping all ClassNodes alive consumed too much memory. + * The call graph is less problematic because only methods being called are kept alive, not entire + * classes. But we should keep an eye on this. + */ + val callsites: mutable.Map[MethodNode, Map[MethodInsnNode, Callsite]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) + + /** + * Closure instantiations in the program being compiled. + * + * Indexing closure instantiations by the containing MethodNode is beneficial for the closure + * optimizer: finding callsites to re-write requires running a producers-consumers analysis on + * the method. Here the closure instantiations are already grouped by method. + */ + val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) + + def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = { + val methodCallsites = callsites(methodNode) + val newCallsites = methodCallsites - invocation + if (newCallsites.isEmpty) callsites.remove(methodNode) + else callsites(methodNode) = newCallsites + methodCallsites.get(invocation) + } + + def addCallsite(callsite: Callsite): Unit = { + val methodCallsites = callsites(callsite.callsiteMethod) + callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite) + } - val closureInstantiations: concurrent.Map[InvokeDynamicInsnNode, ClosureInstantiation] = recordPerRunCache(concurrent.TrieMap.empty) + def containsCallsite(callsite: Callsite): Boolean = callsites(callsite.callsiteMethod) contains callsite.callsiteInstruction + def findCallSite(method: MethodNode, call: MethodInsnNode): Option[Callsite] = callsites.getOrElse(method, Map.empty).get(call) + + def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = { + val methodClosureInits = closureInstantiations(methodNode) + val newClosureInits = methodClosureInits - indy + if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode) + else closureInstantiations(methodNode) = newClosureInits + methodClosureInits.get(indy) + } + + def addClosureInstantiation(closureInit: ClosureInstantiation) = { + val methodClosureInits = closureInstantiations(closureInit.ownerMethod) + closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit) + } def addClass(classNode: ClassNode): Unit = { val classType = classBTypeFromClassNode(classNode) - for { - m <- classNode.methods.asScala - (calls, closureInits) = analyzeCallsites(m, classType) - } { - calls foreach (callsite => callsites(callsite.callsiteInstruction) = callsite) - closureInits foreach (lmf => closureInstantiations(lmf.indy) = ClosureInstantiation(lmf, m, classType)) - } + classNode.methods.asScala.foreach(addMethod(_, classType)) } - /** - * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. - */ - def analyzeCallsites(methodNode: MethodNode, definingClass: ClassBType): (List[Callsite], List[LambdaMetaFactoryCall]) = { + def addIfMissing(methodNode: MethodNode, definingClass: ClassBType): Unit = { + if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass) + } - case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, - annotatedInline: Boolean, annotatedNoInline: Boolean, - warning: Option[CalleeInfoWarning]) + def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = { + if (!BytecodeUtils.isAbstractMethod(methodNode) && !BytecodeUtils.isNativeMethod(methodNode)) { + // TODO: run dataflow analyses to make the call graph more precise + // - producers to get forwarded parameters (ForwardedParam) + // - typeAnalysis for more precise argument types, more precise callee + + // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance + // call is known to be not-null, in which case we don't have to emit a null check when inlining. + // It is also used to get the stack height at the call site. + + val analyzer = { + if (compilerSettings.optNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer(btypes, methodNode))) + } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName)) + } else None + } - /** - * Analyze a callsite and gather meta-data that can be used for inlining decisions. - */ - def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = { - val methodSignature = calleeMethodNode.name + calleeMethodNode.desc + // if the method is too large to run an analyzer, it is not added to the call graph + if (analyzer.nonEmpty) { + val Some(a) = analyzer + def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match { + case nullnessAnalyzer: NullnessAnalyzer => + val frame = nullnessAnalyzer.frameAt(call, methodNode) + frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue + case _ => false + } - try { - // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* - // within a class (not for inherited methods). Since we already have the classBType of the - // callee, we only check there for the methodInlineInfo, we should find it there. - calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { - case Some(methodInlineInfo) => - val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit + var methodCallsites = Map.empty[MethodInsnNode, Callsite] + var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] + + // lazy so it is only computed if actually used by computeArgInfos + lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) + + methodNode.instructions.iterator.asScala foreach { + case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code + val callee: Either[OptimizerWarning, Callee] = for { + (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] + (declarationClassNode, calleeSourceFilePath) <- byteCodeRepository.classNodeAndSourceFilePath(declarationClass): Either[OptimizerWarning, (ClassNode, Option[String])] + } yield { + val declarationClassBType = classBTypeFromClassNode(declarationClassNode) + val info = analyzeCallsite(method, declarationClassBType, call, calleeSourceFilePath) + import info._ + Callee( + callee = method, + calleeDeclarationClass = declarationClassBType, + isStaticallyResolved = isStaticallyResolved, + sourceFilePath = sourceFilePath, + annotatedInline = annotatedInline, + annotatedNoInline = annotatedNoInline, + samParamTypes = info.samParamTypes, + calleeInfoWarning = warning) + } - val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) + val argInfos = computeArgInfos(callee, call, prodCons) - // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: - // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined - // - // TODO: type analysis can render more calls statically resolved. Example: - // new A.f // can be inlined, the receiver type is known to be exactly A. - val isStaticallyResolved: Boolean = { - methodInlineInfo.effectivelyFinal || - classBTypeFromParsedClassfile(receiverTypeInternalName).info.orThrow.inlineInfo.isEffectivelyFinal // (1) + val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { + val numArgs = Type.getArgumentTypes(call.desc).length + receiverNotNullByAnalysis(call, numArgs) } - val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation - - val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( - MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) - - // (1) For invocations of final trait methods, the callee isStaticallyResolved but also - // abstract. Such a callee is not safe to inline - it needs to be re-written to the - // static impl method first (safeToRewrite). - // (2) Final trait methods can be rewritten from the interface to the static implementation - // method to enable inlining. - CallsiteInfo( - safeToInline = - canInlineFromSource && - isStaticallyResolved && // (1) - !isAbstract && - !BytecodeUtils.isConstructor(calleeMethodNode) && - !BytecodeUtils.isNativeMethod(calleeMethodNode), - safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) - annotatedInline = methodInlineInfo.annotatedInline, - annotatedNoInline = methodInlineInfo.annotatedNoInline, - warning = warning) - - case None => - val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) - CallsiteInfo(false, false, false, false, Some(warning)) + methodCallsites += call -> Callsite( + callsiteInstruction = call, + callsiteMethod = methodNode, + callsiteClass = definingClass, + callee = callee, + argInfos = argInfos, + callsiteStackHeight = a.frameAt(call).getStackSize, + receiverKnownNotNull = receiverNotNull, + callsitePosition = callsitePositions.getOrElse(call, NoPosition), + annotatedInline = inlineAnnotatedCallsites(call), + annotatedNoInline = noInlineAnnotatedCallsites(call) + ) + + case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null => + val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) + methodClosureInstantiations += indy -> ClosureInstantiation( + lmf, + methodNode, + definingClass, + capturedArgInfos) + + case _ => } - } catch { - case Invalid(noInfo: NoClassBTypeInfo) => - val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) - CallsiteInfo(false, false, false, false, Some(warning)) + + callsites(methodNode) = methodCallsites + closureInstantiations(methodNode) = methodClosureInstantiations } } + } - // TODO: run dataflow analyses to make the call graph more precise - // - producers to get forwarded parameters (ForwardedParam) - // - typeAnalysis for more precise argument types, more precise callee - - // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance - // call is known to be not-null, in which case we don't have to emit a null check when inlining. - // It is also used to get the stack height at the call site. - localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName) - - val analyzer: Analyzer[_ <: Value] = { - if (compilerSettings.YoptNullnessTracking) new NullnessAnalyzer - else new Analyzer(new BasicInterpreter) + def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + if (callee.isLeft) IntMap.empty + else { + lazy val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) + argInfosForSams(callee.get.samParamTypes, callsiteInsn, numArgs, prodCons) } - analyzer.analyze(definingClass.internalName, methodNode) + } - def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match { - case nullnessAnalyzer: NullnessAnalyzer => - val frame = nullnessAnalyzer.frameAt(call, methodNode) - frame.getStack(frame.getStackSize - 1 - numArgs).nullness == NotNull + def computeCapturedArgInfos(lmf: LambdaMetaFactoryCall, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + val capturedSams = capturedSamTypes(lmf) + val numCaptures = Type.getArgumentTypes(lmf.indy.desc).length + argInfosForSams(capturedSams, lmf.indy, numCaptures, prodCons) + } - case _ => false + private def argInfosForSams(sams: IntMap[ClassBType], consumerInsn: AbstractInsnNode, numConsumed: => Int, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + // TODO: use type analysis instead of ProdCons - should be more efficient + // some random thoughts: + // - assign special types to parameters and indy-lambda-functions to track them + // - upcast should not change type flow analysis: don't lose information. + // - can we do something about factory calls? Foo(x) for case class foo gives a Foo. + // inline the factory? analysis across method boundary? + + // assign to a lazy val to prevent repeated evaluation of the by-name arg + lazy val prodConsI = prodCons + lazy val firstConsumedSlot = { + val consumerFrame = prodConsI.frameAt(consumerInsn) + consumerFrame.stackTop - numConsumed + 1 } - - val callsites = new collection.mutable.ListBuffer[Callsite] - val closureInstantiations = new collection.mutable.ListBuffer[LambdaMetaFactoryCall] - - methodNode.instructions.iterator.asScala foreach { - case call: MethodInsnNode => - val callee: Either[OptimizerWarning, Callee] = for { - (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] - (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] - declarationClassBType = classBTypeFromClassNode(declarationClassNode) - } yield { - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) - Callee( - callee = method, - calleeDeclarationClass = declarationClassBType, - safeToInline = safeToInline, - safeToRewrite = safeToRewrite, - annotatedInline = annotatedInline, - annotatedNoInline = annotatedNoInline, - calleeInfoWarning = warning) + sams flatMap { + case (index, _) => + val prods = prodConsI.initialProducersForValueAt(consumerInsn, firstConsumedSlot + index) + if (prods.size != 1) None + else { + val argInfo = prods.head match { + case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral) + case ParameterProducer(local) => Some(ForwardedParam(local)) + case _ => None + } + argInfo.map((index, _)) } + } + } - val argInfos = if (callee.isLeft) Nil else { - // TODO: for now it's Nil, because we don't run any data flow analysis - // there's no point in using the parameter types, that doesn't add any information. - // NOTE: need to run the same analyses after inlining, to re-compute the argInfos for the - // new duplicated callsites, see Inliner.inline - Nil - } + def samParamTypes(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = { + val paramTypes = { + val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + val isStatic = BytecodeUtils.isStaticMethod(methodNode) + if (isStatic) params else receiverType +: params + } + samTypes(paramTypes) + } - val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { - val numArgs = Type.getArgumentTypes(call.desc).length - receiverNotNullByAnalysis(call, numArgs) - } + def capturedSamTypes(lmf: LambdaMetaFactoryCall): IntMap[ClassBType] = { + val capturedTypes = Type.getArgumentTypes(lmf.indy.desc).map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + samTypes(capturedTypes) + } - callsites += Callsite( - callsiteInstruction = call, - callsiteMethod = methodNode, - callsiteClass = definingClass, - callee = callee, - argInfos = argInfos, - callsiteStackHeight = analyzer.frameAt(call, methodNode).getStackSize, - receiverKnownNotNull = receiverNotNull, - callsitePosition = callsitePositions.getOrElse(call, NoPosition) - ) - - case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) => - closureInstantiations += LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) - - case _ => - } + private def samTypes(types: Array[BType]): IntMap[ClassBType] = { + var res = IntMap.empty[ClassBType] + for (i <- types.indices) { + types(i) match { + case c: ClassBType => + if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c) - (callsites.toList, closureInstantiations.toList) + case _ => + } + } + res } /** + * Just a named tuple used as return type of `analyzeCallsite`. + */ + private case class CallsiteInfo(isStaticallyResolved: Boolean, sourceFilePath: Option[String], + annotatedInline: Boolean, annotatedNoInline: Boolean, + samParamTypes: IntMap[ClassBType], + warning: Option[CalleeInfoWarning]) + + /** + * Analyze a callsite and gather meta-data that can be used for inlining decisions. + */ + private def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, call: MethodInsnNode, calleeSourceFilePath: Option[String]): CallsiteInfo = { + val methodSignature = calleeMethodNode.name + calleeMethodNode.desc + + try { + // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* + // within a class (not for inherited methods). Since we already have the classBType of the + // callee, we only check there for the methodInlineInfo, we should find it there. + calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { + case Some(methodInlineInfo) => + val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) + + val receiverType = classBTypeFromParsedClassfile(call.owner) + // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: + // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined + // + // TODO: (1) doesn't cover the following example: + // trait TravLike { def map = ... } + // sealed trait List extends TravLike { ... } // assume map is not overridden + // final case class :: / final case object Nil + // (l: List).map // can be inlined + // we need to know that + // - the receiver is sealed + // - what are the children of the receiver + // - all children are final + // - none of the children overrides map + // + // TODO: type analysis can render more calls statically resolved. Example: + // new A.f // can be inlined, the receiver type is known to be exactly A. + val isStaticallyResolved: Boolean = { + isNonVirtualCall(call) || // SD-86: super calls (invokespecial) can be inlined -- TODO: check if that's still needed, and if it's correct: scala-dev#143 + methodInlineInfo.effectivelyFinal || + receiverType.info.orThrow.inlineInfo.isEffectivelyFinal // (1) + } + + val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( + MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) + + CallsiteInfo( + isStaticallyResolved = isStaticallyResolved, + sourceFilePath = calleeSourceFilePath, + annotatedInline = methodInlineInfo.annotatedInline, + annotatedNoInline = methodInlineInfo.annotatedNoInline, + samParamTypes = samParamTypes(calleeMethodNode, receiverType), + warning = warning) + + case None => + val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) + CallsiteInfo(false, None, false, false, IntMap.empty, Some(warning)) + } + } catch { + case Invalid(noInfo: NoClassBTypeInfo) => + val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) + CallsiteInfo(false, None, false, false, IntMap.empty, Some(warning)) + } + } + + /** * A callsite in the call graph. * * @param callsiteInstruction The invocation instruction @@ -197,21 +335,35 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * @param callsitePosition The source position of the callsite, used for inliner warnings. */ final case class Callsite(callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: Either[OptimizerWarning, Callee], argInfos: List[ArgInfo], - callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position) { + callee: Either[OptimizerWarning, Callee], argInfos: IntMap[ArgInfo], + callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position, + annotatedInline: Boolean, annotatedNoInline: Boolean) { + /** + * Contains callsites that were created during inlining by cloning this callsite. Used to find + * corresponding callsites when inlining post-inline requests. + */ + val inlinedClones = mutable.Set.empty[ClonedCallsite] + + // an annotation at the callsite takes precedence over an annotation at the definition site + def isInlineAnnotated = annotatedInline || (callee.get.annotatedInline && !annotatedNoInline) + def isNoInlineAnnotated = annotatedNoInline || (callee.get.annotatedNoInline && !annotatedInline) + override def toString = "Invocation of" + s" ${callee.map(_.calleeDeclarationClass.internalName).getOrElse("?")}.${callsiteInstruction.name + callsiteInstruction.desc}" + s"@${callsiteMethod.instructions.indexOf(callsiteInstruction)}" + - s" in ${callsiteClass.internalName}.${callsiteMethod.name}" + s" in ${callsiteClass.internalName}.${callsiteMethod.name}${callsiteMethod.desc}" } + final case class ClonedCallsite(callsite: Callsite, clonedWhenInlining: Callsite) + /** * Information about invocation arguments, obtained through data flow analysis of the callsite method. */ sealed trait ArgInfo - final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo + case object FunctionLiteral extends ArgInfo final case class ForwardedParam(index: Int) extends ArgInfo + // final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo // can be extended, e.g., with constant types /** @@ -221,46 +373,50 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * virtual calls, an override of the callee might be invoked. Also, * the callee can be abstract. * @param calleeDeclarationClass The class in which the callee is declared - * @param safeToInline True if the callee can be safely inlined: it cannot be overridden, - * and the inliner settings (project / global) allow inlining it. - * @param safeToRewrite True if the callee is the interface method of a concrete trait method - * that can be safely re-written to the static implementation method. + * @param isStaticallyResolved True if the callee cannot be overridden * @param annotatedInline True if the callee is annotated @inline * @param annotatedNoInline True if the callee is annotated @noinline + * @param samParamTypes A map from parameter positions to SAM parameter types * @param calleeInfoWarning An inliner warning if some information was not available while * gathering the information about this callee. */ - final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType, - safeToInline: Boolean, safeToRewrite: Boolean, + final case class Callee(callee: MethodNode, calleeDeclarationClass: btypes.ClassBType, + isStaticallyResolved: Boolean, sourceFilePath: Option[String], annotatedInline: Boolean, annotatedNoInline: Boolean, + samParamTypes: IntMap[btypes.ClassBType], calleeInfoWarning: Option[CalleeInfoWarning]) { - assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.") + override def toString = s"Callee($calleeDeclarationClass.${callee.name})" + + def canInlineFromSource = inlinerHeuristics.canInlineFromSource(sourceFilePath) + def isAbstract = isAbstractMethod(callee) + def isSpecialMethod = isConstructor(callee) || isNativeMethod(callee) || hasCallerSensitiveAnnotation(callee) + + def safeToInline = isStaticallyResolved && canInlineFromSource && !isAbstract && !isSpecialMethod } - final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) { + /** + * Metadata about a closure instantiation, stored in the call graph + * + * @param lambdaMetaFactoryCall the InvokeDynamic instruction + * @param ownerMethod the method where the closure is allocated + * @param ownerClass the class containing the above method + * @param capturedArgInfos information about captured arguments. Used for updating the call + * graph when re-writing a closure invocation to the body method. + */ + final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType, capturedArgInfos: IntMap[ArgInfo]) { + /** + * Contains closure instantiations that were created during inlining by cloning this instantiation. + */ + val inlinedClones = mutable.Set.empty[ClosureInstantiation] override def toString = s"ClosureInstantiation($lambdaMetaFactoryCall, ${ownerMethod.name + ownerMethod.desc}, $ownerClass)" } final case class LambdaMetaFactoryCall(indy: InvokeDynamicInsnNode, samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type) object LambdaMetaFactoryCall { - private val lambdaMetaFactoryInternalName: InternalName = "java/lang/invoke/LambdaMetafactory" - - private val metafactoryHandle = { - val metafactoryMethodName: String = "metafactory" - val metafactoryDesc: String = "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/invoke/MethodType;Ljava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/CallSite;" - new Handle(Opcodes.H_INVOKESTATIC, lambdaMetaFactoryInternalName, metafactoryMethodName, metafactoryDesc) - } - - private val altMetafactoryHandle = { - val altMetafactoryMethodName: String = "altMetafactory" - val altMetafactoryDesc: String = "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;" - new Handle(Opcodes.H_INVOKESTATIC, lambdaMetaFactoryInternalName, altMetafactoryMethodName, altMetafactoryDesc) - } - def unapply(insn: AbstractInsnNode): Option[(InvokeDynamicInsnNode, Type, Handle, Type)] = insn match { - case indy: InvokeDynamicInsnNode if indy.bsm == metafactoryHandle || indy.bsm == altMetafactoryHandle => + case indy: InvokeDynamicInsnNode if indy.bsm == coreBTypes.lambdaMetaFactoryMetafactoryHandle || indy.bsm == coreBTypes.lambdaMetaFactoryAltMetafactoryHandle => indy.bsmArgs match { - case Array(samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type, xs@_*) => // xs binding because IntelliJ gets confused about _@_* + case Array(samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type, _@_*) => // LambdaMetaFactory performs a number of automatic adaptations when invoking the lambda // implementation method (casting, boxing, unboxing, and primitive widening, see Javadoc). // @@ -284,7 +440,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // When re-writing the closure callsite to the implMethod, we have to insert a cast. // // The check below ensures that - // (1) the implMethod type has the expected singature (captured types plus argument types + // (1) the implMethod type has the expected signature (captured types plus argument types // from instantiatedMethodType) // (2) the receiver of the implMethod matches the first captured type // (3) all parameters that are not the same in samMethodType and instantiatedMethodType diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index b0dc6ead1b..2fca8991ab 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -8,21 +8,39 @@ package backend.jvm package opt import scala.annotation.switch -import scala.collection.immutable +import scala.collection.mutable +import scala.collection.immutable.IntMap import scala.reflect.internal.util.NoPosition import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree._ import scala.tools.nsc.backend.jvm.BTypes.InternalName -import scala.tools.nsc.backend.jvm.analysis.ProdConsAnalyzer import BytecodeUtils._ import BackendReporting._ import Opcodes._ -import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository.CompilationUnit -import scala.collection.convert.decorateAsScala._ +import scala.collection.JavaConverters._ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + import coreBTypes._ + import backendUtils._ + import ClosureOptimizer._ + + private object closureInitOrdering extends Ordering[ClosureInstantiation] { + override def compare(x: ClosureInstantiation, y: ClosureInstantiation): Int = { + val cls = x.ownerClass.internalName compareTo y.ownerClass.internalName + if (cls != 0) return cls + + val mName = x.ownerMethod.name compareTo y.ownerMethod.name + if (mName != 0) return mName + + val mDesc = x.ownerMethod.desc compareTo y.ownerMethod.desc + if (mDesc != 0) return mDesc + + def pos(inst: ClosureInstantiation) = inst.ownerMethod.instructions.indexOf(inst.lambdaMetaFactoryCall.indy) + pos(x) - pos(y) + } + } /** * If a closure is allocated and invoked within the same method, re-write the invocation to the @@ -54,55 +72,51 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { * [invoke the closure body method] */ def rewriteClosureApplyInvocations(): Unit = { - implicit object closureInitOrdering extends Ordering[ClosureInstantiation] { - override def compare(x: ClosureInstantiation, y: ClosureInstantiation): Int = { - val cls = x.ownerClass.internalName compareTo y.ownerClass.internalName - if (cls != 0) return cls - - val mName = x.ownerMethod.name compareTo y.ownerMethod.name - if (mName != 0) return mName - val mDesc = x.ownerMethod.desc compareTo y.ownerMethod.desc - if (mDesc != 0) return mDesc - - def pos(inst: ClosureInstantiation) = inst.ownerMethod.instructions.indexOf(inst.lambdaMetaFactoryCall.indy) - pos(x) - pos(y) - } + // sort all closure invocations to rewrite to ensure bytecode stability + val toRewrite = mutable.TreeMap.empty[ClosureInstantiation, mutable.ArrayBuffer[(MethodInsnNode, Int)]](closureInitOrdering) + def addRewrite(init: ClosureInstantiation, invocation: MethodInsnNode, stackHeight: Int): Unit = { + val callsites = toRewrite.getOrElseUpdate(init, mutable.ArrayBuffer.empty[(MethodInsnNode, Int)]) + callsites += ((invocation, stackHeight)) } - // Grouping the closure instantiations by method allows running the ProdConsAnalyzer only once per - // method. Also sort the instantiations: If there are multiple closure instantiations in a method, - // closure invocations need to be re-written in a consistent order for bytecode stability. The local - // variable slots for storing captured values depends on the order of rewriting. - val closureInstantiationsByMethod: Map[MethodNode, immutable.TreeSet[ClosureInstantiation]] = { - closureInstantiations.values.groupBy(_.ownerMethod).mapValues(immutable.TreeSet.empty ++ _) - } + // For each closure instantiation find callsites of the closure and add them to the toRewrite + // buffer (cannot change a method's bytecode while still looking for further invocations to + // rewrite, the frame indices of the ProdCons analysis would get out of date). If a callsite + // cannot be rewritten, for example because the lambda body method is not accessible, issue a + // warning. The `toList` in the next line prevents modifying closureInstantiations while + // iterating it: minimalRemoveUnreachableCode (called in the loop) removes elements. + for (method <- closureInstantiations.keysIterator.toList if AsmAnalyzer.sizeOKForBasicValue(method)) closureInstantiations.get(method) match { + case Some(closureInitsBeforeDCE) if closureInitsBeforeDCE.nonEmpty => + val ownerClass = closureInitsBeforeDCE.head._2.ownerClass.internalName + + // Advanced ProdCons queries (initialProducersForValueAt) expect no unreachable code. + localOpt.minimalRemoveUnreachableCode(method, ownerClass) + + if (AsmAnalyzer.sizeOKForSourceValue(method)) closureInstantiations.get(method) match { + case Some(closureInits) => + // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) + lazy val prodCons = new ProdConsAnalyzer(method, ownerClass) + + for (init <- closureInits.valuesIterator) closureCallsites(init, prodCons) foreach { + case Left(warning) => + backendReporting.inlinerWarning(warning.pos, warning.toString) + + case Right((invocation, stackHeight)) => + addRewrite(init, invocation, stackHeight) + } + + case _ => + } - // For each closure instantiation, a list of callsites of the closure that can be re-written - // If a callsite cannot be rewritten, for example because the lambda body method is not accessible, - // a warning is returned instead. - val callsitesToRewrite: List[(ClosureInstantiation, List[Either[RewriteClosureApplyToClosureBodyFailed, (MethodInsnNode, Int)]])] = { - closureInstantiationsByMethod.iterator.flatMap({ - case (methodNode, closureInits) => - // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) - lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.head.ownerClass.internalName) - closureInits.iterator.map(init => (init, closureCallsites(init, prodCons))) - }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod + case _ => } - // Rewrite all closure callsites (or issue inliner warnings for those that cannot be rewritten) - for ((closureInit, callsites) <- callsitesToRewrite) { + for ((closureInit, invocations) <- toRewrite) { // Local variables that hold the captured values and the closure invocation arguments. - // They are lazy vals to ensure that locals for captured values are only allocated if there's - // actually a callsite to rewrite (an not only warnings to be issued). - lazy val (localsForCapturedValues, argumentLocalsList) = localsForClosureRewrite(closureInit) - for (callsite <- callsites) callsite match { - case Left(warning) => - backendReporting.inlinerWarning(warning.pos, warning.toString) - - case Right((invocation, stackHeight)) => - rewriteClosureApplyInvocation(closureInit, invocation, stackHeight, localsForCapturedValues, argumentLocalsList) - } + val (localsForCapturedValues, argumentLocalsList) = localsForClosureRewrite(closureInit) + for ((invocation, stackHeight) <- invocations) + rewriteClosureApplyInvocation(closureInit, invocation, stackHeight, localsForCapturedValues, argumentLocalsList) } } @@ -122,20 +136,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { val argTypes = closureInit.lambdaMetaFactoryCall.samMethodType.getArgumentTypes val firstArgLocal = ownerMethod.maxLocals - // The comment in the unapply method of `LambdaMetaFactoryCall` explains why we have to introduce - // casts for arguments that have different types in samMethodType and instantiatedMethodType. - val castLoadTypes = { - val instantiatedMethodType = closureInit.lambdaMetaFactoryCall.instantiatedMethodType - (argTypes, instantiatedMethodType.getArgumentTypes).zipped map { - case (samArgType, instantiatedArgType) if samArgType != instantiatedArgType => - // the LambdaMetaFactoryCall extractor ensures that the two types are reference types, - // so we don't end up casting primitive values. - Some(instantiatedArgType) - case _ => - None - } - } - val argLocals = LocalsList.fromTypes(firstArgLocal, argTypes, castLoadTypes) + val argLocals = LocalsList.fromTypes(firstArgLocal, argTypes) ownerMethod.maxLocals = firstArgLocal + argLocals.size (captureLocals, argLocals) @@ -154,7 +155,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // TODO: This is maybe over-cautious. // We are checking if the closure body method is accessible at the closure callsite. // If the closure allocation has access to the body method, then the callsite (in the same - // method as the alloction) should have access too. + // method as the allocation) should have access too. val bodyAccessible: Either[OptimizerWarning, Boolean] = for { (bodyMethodNode, declClass) <- byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc): Either[OptimizerWarning, (MethodNode, InternalName)] isAccessible <- inliner.memberIsAccessible(bodyMethodNode.access, classBTypeFromParsedClassfile(declClass), classBTypeFromParsedClassfile(lambdaBodyHandle.getOwner), ownerClass) @@ -162,7 +163,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { isAccessible } - def pos = callGraph.callsites.get(invocation).map(_.callsitePosition).getOrElse(NoPosition) + def pos = callGraph.callsites(ownerMethod).get(invocation).map(_.callsitePosition).getOrElse(NoPosition) val stackSize: Either[RewriteClosureApplyToClosureBodyFailed, Int] = bodyAccessible match { case Left(w) => Left(RewriteClosureAccessCheckFailed(pos, w)) case Right(false) => Left(RewriteClosureIllegalAccess(pos, ownerClass.internalName)) @@ -173,6 +174,28 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { }).toList } + /** + * Check whether `invocation` invokes the SAM of the IndyLambda `closureInit`. + * + * In addition to a perfect match, we also identify cases where a generic FunctionN is created + * but the invocation is to a specialized variant apply$sp... Vice-versa, we also allow the + * case where a specialized FunctionN$sp.. is created but the generic apply is invoked. In + * these cases, the translation will introduce the necessary box / unbox invocations. Example: + * + * val f: Int => Any = (x: Int) => 1 + * f(10) + * + * The IndyLambda creates a specialized `JFunction1$mcII$sp`, whose SAM is `apply$mcII$sp(I)I`. + * The invocation calls `apply(Object)Object`: the method name and type don't match. + * We identify these cases, insert the necessary unbox operation for the arguments, and invoke + * the `$anonfun(I)I` method. + * + * Tests in InlinerTest.optimizeSpecializedClosures. In that test, methods t4/t4a/t5/t8 show + * examples where the parameters have to be unboxed because generic `apply` is called, but the + * lambda body method takes primitive types. + * The opposite case is in t9: a the specialized `apply$sp..` is invoked, but the lambda body + * method takes boxed arguments, so we have to insert boxing operations. + */ private def isSamInvocation(invocation: MethodInsnNode, closureInit: ClosureInstantiation, prodCons: => ProdConsAnalyzer): Boolean = { val indy = closureInit.lambdaMetaFactoryCall.indy if (invocation.getOpcode == INVOKESTATIC) false @@ -187,11 +210,85 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { receiverProducers.size == 1 && receiverProducers.head == indy } - invocation.name == indy.name && { - val indySamMethodDesc = closureInit.lambdaMetaFactoryCall.samMethodType.getDescriptor - indySamMethodDesc == invocation.desc - } && - closureIsReceiver // most expensive check last + def isSpecializedVersion(specName: String, nonSpecName: String) = specName.startsWith(nonSpecName) && specializationSuffix.pattern.matcher(specName.substring(nonSpecName.length)).matches + + def sameOrSpecializedType(specTp: Type, nonSpecTp: Type) = { + specTp == nonSpecTp || { + val specDesc = specTp.getDescriptor + val nonSpecDesc = nonSpecTp.getDescriptor + specDesc.length == 1 && primitives.contains(specDesc) && nonSpecDesc == ObjectRef.descriptor + } + } + + def specializedDescMatches(specMethodDesc: String, nonSpecMethodDesc: String) = { + val specArgs = Type.getArgumentTypes(specMethodDesc) + val nonSpecArgs = Type.getArgumentTypes(nonSpecMethodDesc) + specArgs.corresponds(nonSpecArgs)(sameOrSpecializedType) && sameOrSpecializedType(Type.getReturnType(specMethodDesc), Type.getReturnType(nonSpecMethodDesc)) + } + + def nameAndDescMatch = { + val aName = invocation.name + val bName = indy.name + val aDesc = invocation.desc + val bDesc = closureInit.lambdaMetaFactoryCall.samMethodType.getDescriptor + if (aName == bName) aDesc == bDesc + else if (isSpecializedVersion(aName, bName)) specializedDescMatches(aDesc, bDesc) + else if (isSpecializedVersion(bName, aName)) specializedDescMatches(bDesc, aDesc) + else false + } + + nameAndDescMatch && closureIsReceiver // most expensive check last + } + } + + private def isPrimitiveType(asmType: Type) = { + val sort = asmType.getSort + Type.VOID <= sort && sort <= Type.DOUBLE + } + + /** + * The argument types of the lambda body method may differ in two ways from the argument types of + * the closure member method that is invoked (and replaced by a call to the body). + * - The lambda body method may have more specific types than the invoked closure member, see + * comment in [[LambdaMetaFactoryCall.unapply]]. + * - The invoked closure member might be a specialized variant of the SAM or vice-versa, see + * comment method [[isSamInvocation]]. + */ + private def adaptStoredArguments(closureInit: ClosureInstantiation, invocation: MethodInsnNode): Int => Option[AbstractInsnNode] = { + val invokeDesc = invocation.desc + // The lambda body method has additional parameters for captured values. Here we need to consider + // only those parameters of the body method that correspond to lambda parameters. This happens + // to be exactly LMF.instantiatedMethodType. In fact, `LambdaMetaFactoryCall.unapply` ensures + // that the body method signature is exactly (capturedParams + instantiatedMethodType). + val lambdaBodyMethodDescWithoutCaptures = closureInit.lambdaMetaFactoryCall.instantiatedMethodType.getDescriptor + if (invokeDesc == lambdaBodyMethodDescWithoutCaptures) { + _ => None + } else { + val invokeArgTypes = Type.getArgumentTypes(invokeDesc) + val implMethodArgTypes = Type.getArgumentTypes(lambdaBodyMethodDescWithoutCaptures) + val res = new Array[Option[AbstractInsnNode]](invokeArgTypes.length) + for (i <- invokeArgTypes.indices) { + if (invokeArgTypes(i) == implMethodArgTypes(i)) { + res(i) = None + } else if (isPrimitiveType(implMethodArgTypes(i)) && invokeArgTypes(i).getDescriptor == ObjectRef.descriptor) { + res(i) = Some(getScalaUnbox(implMethodArgTypes(i))) + } else if (isPrimitiveType(invokeArgTypes(i)) && implMethodArgTypes(i).getDescriptor == ObjectRef.descriptor) { + res(i) = Some(getScalaBox(invokeArgTypes(i))) + } else { + assert(!isPrimitiveType(invokeArgTypes(i)), invokeArgTypes(i)) + assert(!isPrimitiveType(implMethodArgTypes(i)), implMethodArgTypes(i)) + // The comment in the unapply method of `LambdaMetaFactoryCall` explains why we have to introduce + // casts for arguments that have different types in samMethodType and instantiatedMethodType. + // + // Note: + // - invokeArgTypes is the same as the argument types in the IndyLambda's samMethodType, + // this is ensured by the `isSamInvocation` filter in this file + // - implMethodArgTypes is the same as the arg types in the IndyLambda's instantiatedMethodType, + // this is ensured by the unapply method in LambdaMetaFactoryCall (file CallGraph) + res(i) = Some(new TypeInsnNode(CHECKCAST, implMethodArgTypes(i).getInternalName)) + } + } + res } } @@ -200,7 +297,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { val lambdaBodyHandle = closureInit.lambdaMetaFactoryCall.implMethod // store arguments - insertStoreOps(invocation, ownerMethod, argumentLocalsList) + insertStoreOps(invocation, ownerMethod, argumentLocalsList, adaptStoredArguments(closureInit, invocation)) // drop the closure from the stack ownerMethod.instructions.insertBefore(invocation, new InsnNode(POP)) @@ -210,8 +307,9 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { insertLoadOps(invocation, ownerMethod, argumentLocalsList) // update maxStack - val capturesStackSize = localsForCapturedValues.size - val invocationStackHeight = stackHeight + capturesStackSize - 1 // -1 because the closure is gone + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numCapturedValues = localsForCapturedValues.locals.length + val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone if (invocationStackHeight > ownerMethod.maxStack) ownerMethod.maxStack = invocationStackHeight @@ -227,46 +325,75 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { insns.insertBefore(invocation, new InsnNode(DUP)) INVOKESPECIAL } - val isInterface = bodyOpcode == INVOKEINTERFACE - val bodyInvocation = new MethodInsnNode(bodyOpcode, lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc, isInterface) + val bodyInvocation = new MethodInsnNode(bodyOpcode, lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc, lambdaBodyHandle.isInterface) ownerMethod.instructions.insertBefore(invocation, bodyInvocation) - val returnType = Type.getReturnType(lambdaBodyHandle.getDesc) - fixLoadedNothingOrNullValue(returnType, bodyInvocation, ownerMethod, btypes) // see comment of that method + val bodyReturnType = Type.getReturnType(lambdaBodyHandle.getDesc) + val invocationReturnType = Type.getReturnType(invocation.desc) + if (isPrimitiveType(invocationReturnType) && bodyReturnType.getDescriptor == ObjectRef.descriptor) { + val op = + if (invocationReturnType.getSort == Type.VOID) getPop(1) + else getScalaUnbox(invocationReturnType) + ownerMethod.instructions.insertBefore(invocation, op) + } else if (isPrimitiveType(bodyReturnType) && invocationReturnType.getDescriptor == ObjectRef.descriptor) { + val op = + if (bodyReturnType.getSort == Type.VOID) getBoxedUnit + else getScalaBox(bodyReturnType) + ownerMethod.instructions.insertBefore(invocation, op) + } else { + // see comment of that method + fixLoadedNothingOrNullValue(bodyReturnType, bodyInvocation, ownerMethod, btypes) + } ownerMethod.instructions.remove(invocation) // update the call graph - val originalCallsite = callGraph.callsites.remove(invocation) + val originalCallsite = callGraph.removeCallsite(invocation, ownerMethod) // the method node is needed for building the call graph entry val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc) - def bodyMethodIsBeingCompiled = byteCodeRepository.classNodeAndSource(lambdaBodyHandle.getOwner).map(_._2 == CompilationUnit).getOrElse(false) - val bodyMethodCallsite = Callsite( - callsiteInstruction = bodyInvocation, - callsiteMethod = ownerMethod, - callsiteClass = closureInit.ownerClass, - callee = bodyMethod.map({ - case (bodyMethodNode, bodyMethodDeclClass) => Callee( + val sourceFilePath = byteCodeRepository.compilingClasses.get(lambdaBodyHandle.getOwner).map(_._2) + val callee = bodyMethod.map({ + case (bodyMethodNode, bodyMethodDeclClass) => + val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass) + Callee( callee = bodyMethodNode, - calleeDeclarationClass = classBTypeFromParsedClassfile(bodyMethodDeclClass), - safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, - safeToRewrite = false, // the lambda body method is not a trait interface method + calleeDeclarationClass = bodyDeclClassType, + isStaticallyResolved = true, + sourceFilePath = sourceFilePath, annotatedInline = false, annotatedNoInline = false, + samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType), calleeInfoWarning = None) - }), - argInfos = Nil, + }) + val argInfos = closureInit.capturedArgInfos ++ originalCallsite.map(cs => cs.argInfos map { + case (index, info) => (index + numCapturedValues, info) + }).getOrElse(IntMap.empty) + val bodyMethodCallsite = Callsite( + callsiteInstruction = bodyInvocation, + callsiteMethod = ownerMethod, + callsiteClass = closureInit.ownerClass, + callee = callee, + argInfos = argInfos, callsiteStackHeight = invocationStackHeight, receiverKnownNotNull = true, // see below (*) - callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition) + callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition), + annotatedInline = false, + annotatedNoInline = false ) // (*) The documentation in class LambdaMetafactory says: // "if implMethod corresponds to an instance method, the first capture argument // (corresponding to the receiver) must be non-null" // Explanation: If the lambda body method is non-static, the receiver is a captured // value. It can only be captured within some instance method, so we know it's non-null. - callGraph.callsites(bodyInvocation) = bodyMethodCallsite + callGraph.addCallsite(bodyMethodCallsite) + + // Rewriting a closure invocation may render code unreachable. For example, the body method of + // (x: T) => ??? has return type Nothing$, and an ATHROW is added (see fixLoadedNothingOrNullValue). + unreachableCodeEliminated -= ownerMethod + + if (hasAdaptedImplMethod(closureInit) && inliner.canInlineCallsite(bodyMethodCallsite).isEmpty) + inliner.inlineCallsite(bodyMethodCallsite) } /** @@ -283,13 +410,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // local. On the other hand, further optimizations (copy propagation, remove unused locals) will // clean it up. - // Captured variables don't need to be cast when loaded at the callsite (castLoadTypes are None). - // This is checked in `isClosureInstantiation`: the types of the captured variables in the indy - // instruction match exactly the corresponding parameter types in the body method. - val localsForCaptures = LocalsList.fromTypes(firstCaptureLocal, capturedTypes, castLoadTypes = _ => None) + val localsForCaptures = LocalsList.fromTypes(firstCaptureLocal, capturedTypes) closureInit.ownerMethod.maxLocals = firstCaptureLocal + localsForCaptures.size - insertStoreOps(indy, closureInit.ownerMethod, localsForCaptures) + insertStoreOps(indy, closureInit.ownerMethod, localsForCaptures, _ => None) insertLoadOps(indy, closureInit.ownerMethod, localsForCaptures) localsForCaptures @@ -301,8 +425,16 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { * * The lowest stack value is stored in the head of the locals list, so the last local is stored first. */ - private def insertStoreOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) = - insertLocalValueOps(before, methodNode, localsList, store = true) + private def insertStoreOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList, beforeStore: Int => Option[AbstractInsnNode]) = { + // The first instruction needs to store into the last local of the `localsList`. + // To avoid reversing the list, we use `insert(previous)`. + val previous = before.getPrevious + def ins(op: AbstractInsnNode) = methodNode.instructions.insert(previous, op) + for ((l, i) <- localsList.locals.zipWithIndex) { + ins(new VarInsnNode(l.storeOpcode, l.local)) + beforeStore(i) foreach ins + } + } /** * Insert load operations in front of the `before` instruction to copy the local values denoted @@ -310,20 +442,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { * * The head of the locals list will be the lowest value on the stack, so the first local is loaded first. */ - private def insertLoadOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) = - insertLocalValueOps(before, methodNode, localsList, store = false) - - private def insertLocalValueOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList, store: Boolean): Unit = { - // If `store` is true, the first instruction needs to store into the last local of the `localsList`. - // Load instructions on the other hand are emitted in the order of the list. - // To avoid reversing the list, we use `insert(previousInstr)` for stores and `insertBefore(before)` for loads. - lazy val previous = before.getPrevious + private def insertLoadOps(before: AbstractInsnNode, methodNode: MethodNode, localsList: LocalsList) = { for (l <- localsList.locals) { - val varOp = new VarInsnNode(if (store) l.storeOpcode else l.loadOpcode, l.local) - if (store) methodNode.instructions.insert(previous, varOp) - else methodNode.instructions.insertBefore(before, varOp) - if (!store) for (castType <- l.castLoadedValue) - methodNode.instructions.insert(varOp, new TypeInsnNode(CHECKCAST, castType.getInternalName)) + val op = new VarInsnNode(l.loadOpcode, l.local) + methodNode.instructions.insertBefore(before, op) } } @@ -345,12 +467,12 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { * Local(6, refOpOffset) :: * Nil */ - def fromTypes(firstLocal: Int, types: Array[Type], castLoadTypes: Int => Option[Type]): LocalsList = { + def fromTypes(firstLocal: Int, types: Array[Type]): LocalsList = { var sizeTwoOffset = 0 val locals: List[Local] = types.indices.map(i => { // The ASM method `type.getOpcode` returns the opcode for operating on a value of `type`. val offset = types(i).getOpcode(ILOAD) - ILOAD - val local = Local(firstLocal + i + sizeTwoOffset, offset, castLoadTypes(i)) + val local = Local(firstLocal + i + sizeTwoOffset, offset) if (local.size == 2) sizeTwoOffset += 1 local })(collection.breakOut) @@ -364,10 +486,15 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { * The xLOAD / xSTORE opcodes are in the following sequence: I, L, F, D, A, so the offset for * a local variable holding a reference (`A`) is 4. See also method `getOpcode` in [[scala.tools.asm.Type]]. */ - case class Local(local: Int, opcodeOffset: Int, castLoadedValue: Option[Type]) { + case class Local(local: Int, opcodeOffset: Int) { def size = if (loadOpcode == LLOAD || loadOpcode == DLOAD) 2 else 1 def loadOpcode = ILOAD + opcodeOffset def storeOpcode = ISTORE + opcodeOffset } } + +object ClosureOptimizer { + val primitives = "BSIJCFDZV" + val specializationSuffix = s"(\\$$mc[$primitives]+\\$$sp)".r +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala new file mode 100644 index 0000000000..518646812e --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CopyProp.scala @@ -0,0 +1,635 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package opt + +import scala.annotation.{switch, tailrec} +import scala.tools.asm.tree.analysis.BasicInterpreter +import scala.tools.asm.Type +import scala.tools.asm.Opcodes._ +import scala.tools.asm.tree._ +import scala.collection.mutable +import scala.collection.JavaConverters._ +import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.tools.nsc.backend.jvm.analysis._ +import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + +class CopyProp[BT <: BTypes](val btypes: BT) { + import btypes._ + import backendUtils._ + + + /** + * For every `xLOAD n`, find all local variable slots that are aliases of `n` using an + * AliasingAnalyzer and change the instruction to `xLOAD m` where `m` is the smallest alias. + * This leaves behind potentially stale `xSTORE n` instructions, which are then eliminated + * by [[eliminateStaleStores]]. + */ + def copyPropagation(method: MethodNode, owner: InternalName): Boolean = { + AsmAnalyzer.sizeOKForAliasing(method) && { + var changed = false + val numParams = parametersSize(method) + lazy val aliasAnalysis = new AsmAnalyzer(method, owner, new AliasingAnalyzer(new BasicInterpreter)) + + // Remember locals that are used in a `LOAD` instruction. Assume a program has two LOADs: + // + // ... + // LOAD 3 // aliases of 3 here: <3> + // ... + // LOAD 1 // aliases of 1 here: <1, 3> + // + // In this example, we should change the second load from 1 to 3, which might render the + // local variable 1 unused. + val knownUsed = new Array[Boolean](method.maxLocals) + + def usedOrMinAlias(it: IntIterator, init: Int): Int = { + if (knownUsed(init)) init + else { + var r = init + while (it.hasNext) { + val n = it.next() + // knownUsed.length is the number of locals, `n` may be a stack slot + if (n < knownUsed.length && knownUsed(n)) return n + if (n < r) r = n + } + r + } + } + + val it = method.instructions.iterator + while (it.hasNext) it.next() match { + case vi: VarInsnNode if vi.`var` >= numParams && isLoad(vi) => + val aliases = aliasAnalysis.frameAt(vi).asInstanceOf[AliasingFrame[_]].aliasesOf(vi.`var`) + if (aliases.size > 1) { + val alias = usedOrMinAlias(aliases.iterator, vi.`var`) + if (alias != -1) { + changed = true + vi.`var` = alias + } + } + knownUsed(vi.`var`) = true + + case _ => + } + + changed + } + } + + /** + * Eliminate `xSTORE` instructions that have no consumer. If the instruction can be completely + * eliminated, it is replaced by a POP. The [[eliminatePushPop]] cleans up unnecessary POPs. + * + * Note that an `ASOTRE` can not always be eliminated: it removes a reference to the object that + * is currently stored in that local, which potentially frees it for GC (SI-5313). Therefore + * we replace such stores by `POP; ACONST_NULL; ASTORE x`. + */ + def eliminateStaleStores(method: MethodNode, owner: InternalName): Boolean = { + AsmAnalyzer.sizeOKForSourceValue(method) && { + lazy val prodCons = new ProdConsAnalyzer(method, owner) + def hasNoCons(varIns: AbstractInsnNode, slot: Int) = prodCons.consumersOfValueAt(varIns.getNext, slot).isEmpty + + // insns to delete: IINC that have no consumer + val toDelete = mutable.ArrayBuffer.empty[IincInsnNode] + + // xSTORE insns to be replaced by POP or POP2 + val storesToDrop = mutable.ArrayBuffer.empty[VarInsnNode] + + // ASTORE insn that have no consumer. + // - if the local is not live, the store is replaced by POP + // - otherwise, pop the argument value and store NULL instead. Unless the boolean field is + // `true`: then the store argument is already known to be ACONST_NULL. + val toNullOut = mutable.ArrayBuffer.empty[(VarInsnNode, Boolean)] + + // `true` for variables that are known to be live + val liveVars = new Array[Boolean](method.maxLocals) + + val it = method.instructions.iterator + while (it.hasNext) it.next() match { + case vi: VarInsnNode if isStore(vi) && hasNoCons(vi, vi.`var`) => + val canElim = vi.getOpcode != ASTORE || { + val currentFieldValueProds = prodCons.initialProducersForValueAt(vi, vi.`var`) + currentFieldValueProds.size == 1 && (currentFieldValueProds.head match { + case ParameterProducer(0) => !isStaticMethod(method) // current field value is `this`, which won't be gc'd anyway + case _: UninitializedLocalProducer => true // field is not yet initialized, so current value cannot leak + case _ => false + }) + } + if (canElim) storesToDrop += vi + else { + val prods = prodCons.producersForValueAt(vi, prodCons.frameAt(vi).stackTop) + val isStoreNull = prods.size == 1 && prods.head.getOpcode == ACONST_NULL + toNullOut += ((vi, isStoreNull)) + } + + case ii: IincInsnNode if hasNoCons(ii, ii.`var`) => + toDelete += ii + + case vi: VarInsnNode => + liveVars(vi.`var`) = true + + case ii: IincInsnNode => + liveVars(ii.`var`) = true + + case _ => + } + + def replaceByPop(vi: VarInsnNode): Unit = { + val size = if (isSize2LoadOrStore(vi.getOpcode)) 2 else 1 + method.instructions.set(vi, getPop(size)) + } + + toDelete foreach method.instructions.remove + + storesToDrop foreach replaceByPop + + for ((vi, isStoreNull) <- toNullOut) { + if (!liveVars(vi.`var`)) replaceByPop(vi) // can drop `ASTORE x` where x has only dead stores + else { + if (!isStoreNull) { + val prev = vi.getPrevious + method.instructions.insert(prev, new InsnNode(ACONST_NULL)) + method.instructions.insert(prev, getPop(1)) + } + } + } + + toDelete.nonEmpty || storesToDrop.nonEmpty || toNullOut.nonEmpty + } + } + + /** + * When a POP instruction has a single producer, remove the POP and eliminate the producer by + * bubbling up the POPs. For example, given + * ILOAD 1; ILOAD 2; IADD; POP + * we first eliminate the POP, then the IADD, then its inputs, so the entire sequence goes away. + * If a producer cannot be eliminated (need to keep side-effects), a POP is inserted. + * + * A special case eliminates the creation of unused objects with side-effect-free constructors: + * NEW scala/Tuple1; DUP; ALOAD 0; INVOKESPECIAL scala/Tuple1.<init>; POP + * The POP has a single producer (the DUP), it's easy to eliminate these two. A special case + * is needed to eliminate the INVOKESPECIAL and NEW. + */ + def eliminatePushPop(method: MethodNode, owner: InternalName): Boolean = { + AsmAnalyzer.sizeOKForSourceValue(method) && { + // A queue of instructions producing a value that has to be eliminated. If possible, the + // instruction (and its inputs) will be removed, otherwise a POP is inserted after + val queue = mutable.Queue.empty[ProducedValue] + // Contains constructor invocations for values that can be eliminated if unused. + val sideEffectFreeConstructorCalls = mutable.ArrayBuffer.empty[MethodInsnNode] + + // instructions to remove (we don't change the bytecode while analyzing it. this allows + // running the ProdConsAnalyzer only once.) + val toRemove = mutable.Set.empty[AbstractInsnNode] + // instructions to insert before some instruction + val toInsertBefore = mutable.Map.empty[AbstractInsnNode, List[InsnNode]] + // an instruction to insert after some instruction + val toInsertAfter = mutable.Map.empty[AbstractInsnNode, AbstractInsnNode] + + lazy val prodCons = new ProdConsAnalyzer(method, owner) + + /** + * Returns the producers for the stack value `inputSlot` consumed by `cons`, if the consumer + * instruction is the only consumer for all of these producers. + * + * If a producer has multiple consumers, or the value is the caught exception in a catch + * block, this method returns Set.empty. + */ + def producersIfSingleConsumer(cons: AbstractInsnNode, inputSlot: Int): Set[AbstractInsnNode] = { + /** + * True if the values produced by `prod` are all the same. Most instructions produce a single + * value. DUP and DUP2 (with a size-2 input) produce two equivalent values. However, there + * are some exotic instructions that produce multiple non-equal values (DUP_X1, SWAP, ...). + * + * Assume we have `DUP_X2; POP`. In order to remove the `POP` we need to change the DUP_X2 + * into something else, which is not straightforward. + * + * Since scalac never emits any of those exotic bytecodes, we don't optimize them. + */ + def producerHasSingleOutput(prod: AbstractInsnNode): Boolean = prod match { + case _: ExceptionProducer[_] | _: UninitializedLocalProducer => + // POP of an exception in a catch block cannot be removed. For an uninitialized local, + // there should not be a consumer. We are conservative and include it here, so the + // producer would not be removed. + false + + case _: ParameterProducer => + true + + case _ => (prod.getOpcode: @switch) match { + case DUP => true + case DUP2 => prodCons.frameAt(prod).peekStack(0).getSize == 2 + case _ => InstructionStackEffect.prod(InstructionStackEffect.forAsmAnalysis(prod, prodCons.frameAt(prod))) == 1 + } + } + + val prods = prodCons.producersForValueAt(cons, inputSlot) + val singleConsumer = prods forall { prod => + producerHasSingleOutput(prod) && { + // for DUP / DUP2, we only consider the value that is actually consumed by cons + val conss = prodCons.consumersOfValueAt(prod.getNext, inputSlot) + conss.size == 1 && conss.head == cons + } + } + if (singleConsumer) prods else Set.empty + } + + /** + * For a POP instruction that is the single consumer of its producers, remove the POP and + * enqueue the producers. + */ + def handleInitialPop(pop: AbstractInsnNode): Unit = { + val prods = producersIfSingleConsumer(pop, prodCons.frameAt(pop).stackTop) + if (prods.nonEmpty) { + toRemove += pop + val size = if (pop.getOpcode == POP2) 2 else 1 + queue ++= prods.map(ProducedValue(_, size)) + } + } + + /** + * Traverse the method in its initial state and collect all POP instructions and side-effect + * free constructor invocations that can be eliminated. + */ + def collectInitialPopsAndPureConstrs(): Unit = { + val it = method.instructions.iterator + while (it.hasNext) { + val insn = it.next() + (insn.getOpcode: @switch) match { + case POP | POP2 => + handleInitialPop(insn) + + case INVOKESPECIAL => + val mi = insn.asInstanceOf[MethodInsnNode] + if (isSideEffectFreeConstructorCall(mi)) sideEffectFreeConstructorCalls += mi + + case _ => + } + } + } + + /** + * Eliminate the `numArgs` inputs of the instruction `prod` (which was eliminated). For + * each input value + * - if the `prod` instruction is the single consumer, enqueue the producers of the input + * - otherwise, insert a POP instruction to POP the input value + */ + def handleInputs(prod: AbstractInsnNode, numArgs: Int): Unit = { + val frame = prodCons.frameAt(prod) + val pops = mutable.ListBuffer.empty[InsnNode] + @tailrec def handle(stackOffset: Int): Unit = { + if (stackOffset >= 0) { + val prods = producersIfSingleConsumer(prod, frame.stackTop - stackOffset) + val nSize = frame.peekStack(stackOffset).getSize + if (prods.isEmpty) pops append getPop(nSize) + else queue ++= prods.map(ProducedValue(_, nSize)) + handle(stackOffset - 1) + } + } + handle(numArgs - 1) // handle stack offsets (numArgs - 1) to 0 + if (pops.nonEmpty) toInsertBefore(prod) = pops.toList + } + + /** + * Eliminate LMF `indy` and its inputs. + */ + def handleClosureInst(indy: InvokeDynamicInsnNode): Unit = { + toRemove += indy + callGraph.removeClosureInstantiation(indy, method) + handleInputs(indy, Type.getArgumentTypes(indy.desc).length) + } + + def runQueue(): Unit = while (queue.nonEmpty) { + val ProducedValue(prod, size) = queue.dequeue() + + def prodString = s"Producer ${AsmUtils textify prod}@${method.instructions.indexOf(prod)}\n${AsmUtils textify method}" + def popAfterProd(): Unit = toInsertAfter(prod) = getPop(size) + + (prod.getOpcode: @switch) match { + case ACONST_NULL | ICONST_M1 | ICONST_0 | ICONST_1 | ICONST_2 | ICONST_3 | ICONST_4 | ICONST_5 | LCONST_0 | LCONST_1 | FCONST_0 | FCONST_1 | FCONST_2 | DCONST_0 | DCONST_1 | + BIPUSH | SIPUSH | ILOAD | LLOAD | FLOAD | DLOAD | ALOAD=> + toRemove += prod + + case opc @ (DUP | DUP2) => + assert(opc != 2 || size == 2, s"DUP2 for two size-1 values; $prodString") // ensured in method `producerHasSingleOutput` + if (toRemove(prod)) + // the DUP is already scheduled for removal because one of its consumers is a POP. + // now the second consumer is also a POP, so we need to eliminate the DUP's input. + handleInputs(prod, 1) + else + toRemove += prod + + case DUP_X1 | DUP_X2 | DUP2_X1 | DUP2_X2 | SWAP => + // these are excluded in method `producerHasSingleOutput` + assert(false, s"Cannot eliminate value pushed by an instruction with multiple output values; $prodString") + + case IDIV | LDIV | IREM | LREM => + popAfterProd() // keep potential division by zero + + case IADD | LADD | FADD | DADD | ISUB | LSUB | FSUB | DSUB | IMUL | LMUL | FMUL | DMUL | FDIV | DDIV | FREM | DREM | + LSHL | LSHR | LUSHR | + IAND | IOR | IXOR | LAND | LOR | LXOR | + LCMP | FCMPL | FCMPG | DCMPL | DCMPG => + toRemove += prod + handleInputs(prod, 2) + + case INEG | LNEG | FNEG | DNEG | + I2L | I2F | I2D | L2I | L2F | L2D | F2I | F2L | F2D | D2I | D2L | D2F | I2B | I2C | I2S => + toRemove += prod + handleInputs(prod, 1) + + case GETFIELD | GETSTATIC => + // TODO eliminate side-effect free module loads (https://github.com/scala/scala-dev/issues/16) + if (isBoxedUnit(prod)) toRemove += prod + else popAfterProd() // keep potential class initialization (static field) or NPE (instance field) + + case INVOKEVIRTUAL | INVOKESPECIAL | INVOKESTATIC | INVOKEINTERFACE => + val methodInsn = prod.asInstanceOf[MethodInsnNode] + if (isSideEffectFreeCall(methodInsn)) { + toRemove += prod + callGraph.removeCallsite(methodInsn, method) + val receiver = if (methodInsn.getOpcode == INVOKESTATIC) 0 else 1 + handleInputs(prod, Type.getArgumentTypes(methodInsn.desc).length + receiver) + } else + popAfterProd() + + case INVOKEDYNAMIC => + prod match { + case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => handleClosureInst(indy) + case _ => popAfterProd() + } + + case NEW => + if (isNewForSideEffectFreeConstructor(prod)) toRemove += prod + else popAfterProd() + + case LDC => prod.asInstanceOf[LdcInsnNode].cst match { + case _: java.lang.Integer | _: java.lang.Float | _: java.lang.Long | _: java.lang.Double | _: String => + toRemove += prod + + case _ => + // don't remove class literals, method types, method handles: keep a potential NoClassDefFoundError + popAfterProd() + } + + case MULTIANEWARRAY => + toRemove += prod + handleInputs(prod, prod.asInstanceOf[MultiANewArrayInsnNode].dims) + + case _ => + popAfterProd() + } + } + + // there are two cases when we can eliminate a constructor call: + // - NEW T; INVOKESPECIAL T.<init> -- there's no DUP, the new object is consumed only by the constructor) + // - NEW T; DUP; INVOKESPECIAL T.<init>, where the DUP will be removed + def eliminateUnusedPureConstructorCalls(): Boolean = { + var changed = false + + def removeConstructorCall(mi: MethodInsnNode): Unit = { + toRemove += mi + callGraph.removeCallsite(mi, method) + sideEffectFreeConstructorCalls -= mi + changed = true + } + + for (mi <- sideEffectFreeConstructorCalls.toList) { // toList to allow removing elements while traversing + val frame = prodCons.frameAt(mi) + val stackTop = frame.stackTop + val numArgs = Type.getArgumentTypes(mi.desc).length + val receiverProds = producersIfSingleConsumer(mi, stackTop - numArgs) + if (receiverProds.size == 1) { + val receiverProd = receiverProds.head + if (receiverProd.getOpcode == NEW) { + removeConstructorCall(mi) + handleInputs(mi, numArgs + 1) // removes the producers of args and receiver + } else if (receiverProd.getOpcode == DUP && toRemove.contains(receiverProd)) { + val dupProds = producersIfSingleConsumer(receiverProd, prodCons.frameAt(receiverProd).stackTop) + if (dupProds.size == 1 && dupProds.head.getOpcode == NEW) { + removeConstructorCall(mi) + handleInputs(mi, numArgs) // removes the producers of args. the producer of the receiver is DUP and already in toRemove. + queue += ProducedValue(dupProds.head, 1) // removes the NEW (which is NOT the producer of the receiver!) + } + } + } + } + changed + } + + collectInitialPopsAndPureConstrs() + + // eliminating producers enables eliminating unused constructor calls (when a DUP gets removed). + // vice-versa, eliminating a constructor call adds producers of constructor parameters to the queue. + // so the two run in a loop. + runQueue() + while (eliminateUnusedPureConstructorCalls()) + runQueue() + + var changed = false + toInsertAfter foreach { + case (target, insn) => + nextExecutableInstructionOrLabel(target) match { + // `insn` is of type `InsnNode`, so we only need to check the Opcode when comparing to another instruction + case Some(next) if next.getOpcode == insn.getOpcode && toRemove(next) => + // Inserting and removing a POP at the same place should not enable `changed`. This happens + // when a POP directly follows a producer that cannot be eliminated, e.g. INVOKESTATIC A.m ()I; POP + // The POP is initially added to `toRemove`, and the `INVOKESTATIC` producer is added to the queue. + // Because the producer cannot be elided, a POP is added to `toInsertAfter`. + toRemove -= next + + case _ => + changed = true + method.instructions.insert(target, insn) + } + } + toInsertBefore foreach { + case (target, insns) => + changed = true + insns.foreach(method.instructions.insertBefore(target, _)) + } + toRemove foreach { insn => + changed = true + method.instructions.remove(insn) + } + changed + } + } + + case class ProducedValue(producer: AbstractInsnNode, size: Int) { + override def toString = s"<${AsmUtils textify producer}>" + } + + /** + * Remove `xSTORE n; xLOAD n` pairs if + * - the local variable n is not used anywhere else in the method (1), and + * - there are no executable instructions and no live labels (jump targets) between the two (2) + * + * Note: store-load pairs that cannot be eliminated could be replaced by `DUP; xSTORE n`, but + * that's just cosmetic and doesn't help for anything. + * + * (1) This could be made more precise by running a prodCons analysis and checking that the load + * is the only user of the store. Then we could eliminate the pair even if the variable is live + * (except for ASTORE, SI-5313). Not needing an analyzer is more efficient, and catches most + * cases. + * + * (2) The implementation uses a conservative estimation for liveness (if some instruction uses + * local n, then n is considered live in the entire method). In return, it doesn't need to run an + * Analyzer on the method, making it more efficient. + * + * This method also removes `ACONST_NULL; ASTORE n` if the local n is not live. This pattern is + * introduced by [[eliminateStaleStores]]. + * + * The implementation is a little tricky to support the following case: + * ISTORE 1; ISTORE 2; ILOAD 2; ACONST_NULL; ASTORE 3; ILOAD 1 + * The outer store-load pair can be removed if two the inner pairs can be. + */ + def eliminateStoreLoad(method: MethodNode): Boolean = { + val removePairs = mutable.Set.empty[RemovePair] + val liveVars = new Array[Boolean](method.maxLocals) + val liveLabels = mutable.Set.empty[LabelNode] + + def mkRemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]): RemovePair = { + val r = RemovePair(store, other, depends) + removePairs += r + r + } + + def registerLiveVarsLabels(insn: AbstractInsnNode): Unit = insn match { + case vi: VarInsnNode => liveVars(vi.`var`) = true + case ii: IincInsnNode => liveVars(ii.`var`) = true + case j: JumpInsnNode => liveLabels += j.label + case s: TableSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala + case s: LookupSwitchInsnNode => liveLabels += s.dflt; liveLabels ++= s.labels.asScala + case _ => + } + + val pairStartStack = new mutable.Stack[(AbstractInsnNode, mutable.ListBuffer[RemovePairDependency])] + + def push(insn: AbstractInsnNode) = { + pairStartStack push ((insn, mutable.ListBuffer.empty)) + } + + def addDepends(dependency: RemovePairDependency) = if (pairStartStack.nonEmpty) { + val (_, depends) = pairStartStack.top + depends += dependency + } + + def completesStackTop(load: AbstractInsnNode) = isLoad(load) && pairStartStack.nonEmpty && { + pairStartStack.top match { + case (store: VarInsnNode, _) => store.`var` == load.asInstanceOf[VarInsnNode].`var` + case _ => false + } + } + + /** + * Try to pair `insn` with its correspondent on the stack + * - if the stack top is a store and `insn` is a corresponding load, create a pair + * - otherwise, check the two top stack values for `null; store`. if it matches, create + * a pair and continue pairing `insn` on the remaining stack + * - otherwise, empty the stack and mark the local variables in it live + */ + def tryToPairInstruction(insn: AbstractInsnNode): Unit = { + @tailrec def emptyStack(): Unit = if (pairStartStack.nonEmpty) { + registerLiveVarsLabels(pairStartStack.pop()._1) + emptyStack() + } + + @tailrec def tryPairing(): Unit = { + if (completesStackTop(insn)) { + val (store: VarInsnNode, depends) = pairStartStack.pop() + addDepends(mkRemovePair(store, insn, depends.toList)) + } else if (pairStartStack.nonEmpty) { + val (top, topDepends) = pairStartStack.pop() + if (pairStartStack.nonEmpty) { + (pairStartStack.top, top) match { + case ((ldNull: InsnNode, depends), store: VarInsnNode) if ldNull.getOpcode == ACONST_NULL && store.getOpcode == ASTORE => + pairStartStack.pop() + addDepends(mkRemovePair(store, ldNull, depends.toList)) + // example: store; (null; store;) (store; load;) load + // s1^ ^^^^^p1^^^^^ // p1 is added to s1's depends + // then: store; (null; store;) load + // s2^ ^^^^p2^^^^^ // p1 and p2 are added to s2's depends + topDepends foreach addDepends + tryPairing() + + case _ => + // empty the stack - a non-matching insn was found, cannot create any pairs to remove + registerLiveVarsLabels(insn) + registerLiveVarsLabels(top) + emptyStack() + } + } else { + // stack only has one element + registerLiveVarsLabels(insn) + registerLiveVarsLabels(top) + } + } else { + // stack is empty already + registerLiveVarsLabels(insn) + } + } + + tryPairing() + } + + + var insn = method.instructions.getFirst + + @tailrec def advanceToNextExecutableOrLabel(): Unit = { + insn = insn.getNext + if (insn != null && !isExecutable(insn) && !insn.isInstanceOf[LabelNode]) advanceToNextExecutableOrLabel() + } + + while (insn != null) { + insn match { + case _ if insn.getOpcode == ACONST_NULL => push(insn) + case vi: VarInsnNode if isStore(vi) => push(insn) + case label: LabelNode if pairStartStack.nonEmpty => addDepends(LabelNotLive(label)) + case _ => tryToPairInstruction(insn) + } + advanceToNextExecutableOrLabel() + } + + // elide RemovePairs that depend on live labels or other RemovePair that have to be elided. + // example: store 1; store 2; label x; load 2; load 1 + // if x is live, the inner pair has to be elided, causing the outer pair to be elided too. + + var doneEliding = false + + def elide(removePair: RemovePair) = { + doneEliding = false + liveVars(removePair.store.`var`) = true + removePairs -= removePair + } + + while (!doneEliding) { + doneEliding = true + for (removePair <- removePairs.toList) { + val slot = removePair.store.`var` + if (liveVars(slot)) elide(removePair) + else removePair.depends foreach { + case LabelNotLive(label) => if (liveLabels(label)) elide(removePair) + case other: RemovePair => if (!removePairs(other)) elide(removePair) + } + } + } + + for (removePair <- removePairs) { + method.instructions.remove(removePair.store) + method.instructions.remove(removePair.other) + } + + removePairs.nonEmpty + } +} + +trait RemovePairDependency +case class RemovePair(store: VarInsnNode, other: AbstractInsnNode, depends: List[RemovePairDependency]) extends RemovePairDependency { + override def toString = s"<${AsmUtils textify store},${AsmUtils textify other}> [$depends]" +} +case class LabelNotLive(label: LabelNode) extends RemovePairDependency diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala index e7dd5abc57..7bc4ea2392 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala @@ -27,7 +27,7 @@ import scala.tools.nsc.backend.jvm.BackendReporting.UnknownScalaInlineInfoVersio * In principle we could encode the InlineInfo into a Java annotation (instead of a classfile attribute). * However, an attribute allows us to save many bits. In particular, note that the strings in an * InlineInfo are serialized as references to constants in the constant pool, and those strings - * (traitImplClassSelfType, method names, method signatures) would exist in there anyway. So the + * (method names, method signatures) would exist in there anyway. So the * ScalaInlineAttribute remains relatively compact. */ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineInfoAttribute.attributeName) { @@ -47,13 +47,16 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI result.putByte(InlineInfoAttribute.VERSION) - var hasSelfIsFinal = 0 - if (inlineInfo.isEffectivelyFinal) hasSelfIsFinal |= 1 - if (inlineInfo.traitImplClassSelfType.isDefined) hasSelfIsFinal |= 2 - result.putByte(hasSelfIsFinal) + var flags = 0 + if (inlineInfo.isEffectivelyFinal) flags |= 1 + // flags |= 2 // no longer written + if (inlineInfo.sam.isDefined) flags |= 4 + result.putByte(flags) - for (selfInternalName <- inlineInfo.traitImplClassSelfType) { - result.putShort(cw.newUTF8(selfInternalName)) + for (samNameDesc <- inlineInfo.sam) { + val (name, desc) = samNameDesc.span(_ != '(') + result.putShort(cw.newUTF8(name)) + result.putShort(cw.newUTF8(desc)) } // The method count fits in a short (the methods_count in a classfile is also a short) @@ -68,10 +71,10 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI result.putShort(cw.newUTF8(desc)) var inlineInfo = 0 - if (info.effectivelyFinal) inlineInfo |= 1 - if (info.traitMethodWithStaticImplementation) inlineInfo |= 2 - if (info.annotatedInline) inlineInfo |= 4 - if (info.annotatedNoInline) inlineInfo |= 8 + if (info.effectivelyFinal) inlineInfo |= 1 + // inlineInfo |= 2 // no longer written + if (info.annotatedInline) inlineInfo |= 4 + if (info.annotatedNoInline) inlineInfo |= 8 result.putByte(inlineInfo) } @@ -79,7 +82,7 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI } /** - * De-serialize the attribute into an InlineInfo. The attribute starts at cr.b(off), but we don't + * Deserialize the attribute into an InlineInfo. The attribute starts at cr.b(off), but we don't * need to access that array directly, we can use the `read` methods provided by the ClassReader. * * `buf` is a pre-allocated character array that is guaranteed to be long enough to hold any @@ -94,15 +97,17 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI val version = nextByte() if (version == 1) { - val hasSelfIsFinal = nextByte() - val isFinal = (hasSelfIsFinal & 1) != 0 - val hasSelf = (hasSelfIsFinal & 2) != 0 - - val self = if (hasSelf) { - val selfName = nextUTF8() - Some(selfName) - } else { - None + val flags = nextByte() + val isFinal = (flags & 1) != 0 + val hasSelf = (flags & 2) != 0 + val hasSam = (flags & 4) != 0 + + if (hasSelf) nextUTF8() // no longer used + + val sam = if (!hasSam) None else { + val name = nextUTF8() + val desc = nextUTF8() + Some(name + desc) } val numEntries = nextShort() @@ -111,14 +116,15 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI val desc = nextUTF8() val inlineInfo = nextByte() - val isFinal = (inlineInfo & 1) != 0 - val traitMethodWithStaticImplementation = (inlineInfo & 2) != 0 - val isInline = (inlineInfo & 4) != 0 - val isNoInline = (inlineInfo & 8) != 0 - (name + desc, MethodInlineInfo(isFinal, traitMethodWithStaticImplementation, isInline, isNoInline)) + val isFinal = (inlineInfo & 1) != 0 + // = (inlineInfo & 2) != 0 // no longer used + val isInline = (inlineInfo & 4) != 0 + val isNoInline = (inlineInfo & 8) != 0 + (name + desc, MethodInlineInfo(isFinal, isInline, isNoInline)) }).toMap - InlineInfoAttribute(InlineInfo(self, isFinal, infos, None)) + val info = InlineInfo(isFinal, sam, infos, None) + InlineInfoAttribute(info) } else { val msg = UnknownScalaInlineInfoVersion(cr.getClassName, version) InlineInfoAttribute(BTypes.EmptyInlineInfo.copy(warning = Some(msg))) @@ -128,9 +134,18 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI object InlineInfoAttribute { /** + * Notes: + * - `traitImplClassSelfType` is no longer emitted, `hasTraitImplClassSelfType` is always emitted + * as 0. Similarly, `traitMethodWithStaticImplementation` is always emitted 0. + * - When reading an existing attribute where `hasTraitImplClassSelfType` is 1, the + * `traitImplClassSelfType` is ignored. Also the value of `traitMethodWithStaticImplementation` + * is ignored. + * * [u1] version - * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1) + * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1), hasSam (<< 2), hasLateInterfaces (<< 3) * [u2]? traitImplClassSelfType (reference) + * [u2]? samName (reference) + * [u2]? samDescriptor (reference) * [u2] numMethodEntries * [u2] name (reference) * [u2] descriptor (reference) @@ -142,7 +157,7 @@ object InlineInfoAttribute { } /** - * In order to instruct the ASM framework to de-serialize the ScalaInlineInfo attribute, we need + * In order to instruct the ASM framework to deserialize the ScalaInlineInfo attribute, we need * to pass a prototype instance when running the class reader. */ -object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null)) +object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(false, null, null, null)) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 6b2786c1a3..1c29859f46 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -9,59 +9,125 @@ package opt import scala.annotation.tailrec import scala.tools.asm -import asm.Handle import asm.Opcodes._ import asm.tree._ -import scala.collection.convert.decorateAsScala._ -import scala.collection.convert.decorateAsJava._ +import scala.collection.JavaConverters._ import AsmUtils._ import BytecodeUtils._ import collection.mutable -import scala.tools.asm.tree.analysis.SourceInterpreter import BackendReporting._ import scala.tools.nsc.backend.jvm.BTypes.InternalName class Inliner[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + import inlinerHeuristics._ + import backendUtils._ - def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { - localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { - case invocation: MethodInsnNode => callGraph.callsites.remove(invocation) - case indy: InvokeDynamicInsnNode => callGraph.closureInstantiations.remove(indy) - case _ => + sealed trait InlineLog { + def request: InlineRequest + } + final case class InlineLogSuccess(request: InlineRequest, sizeBefore: Int, sizeInlined: Int) extends InlineLog { + var downstreamLog: mutable.Buffer[InlineLog] = mutable.ListBuffer.empty + } + final case class InlineLogFail(request: InlineRequest, warning: CannotInlineWarning) extends InlineLog + final case class InlineLogRollback(request: InlineRequest, warnings: List[CannotInlineWarning]) extends InlineLog + + object InlineLog { + private def shouldLog(request: InlineRequest): Boolean = { + def logEnabled = compilerSettings.YoptLogInline.isSetByUser + def matchesName = { + val prefix = compilerSettings.YoptLogInline.value match { + case "_" => "" + case p => p + } + val name: String = request.callsite.callsiteClass.internalName + "." + request.callsite.callsiteMethod.name + name startsWith prefix + } + logEnabled && (upstream != null || (isTopLevel && matchesName)) + } + + // indexed by callsite method + private val logs = mutable.Map.empty[MethodNode, mutable.LinkedHashSet[InlineLog]] + + private var upstream: InlineLogSuccess = _ + private var isTopLevel = true + + def withInlineLogging[T](request: InlineRequest)(inlineRequest: => Unit)(inlinePost: => T): T = { + def doInlinePost(): T = { + val savedIsTopLevel = isTopLevel + isTopLevel = false + try inlinePost + finally isTopLevel = savedIsTopLevel + } + if (shouldLog(request)) { + val sizeBefore = request.callsite.callsiteMethod.instructions.size + inlineRequest + val log = InlineLogSuccess(request, sizeBefore, request.callsite.callee.get.callee.instructions.size) + apply(log) + + val savedUpstream = upstream + upstream = log + try doInlinePost() + finally upstream = savedUpstream + } else { + inlineRequest + doInlinePost() + } + } + + def apply(log: => InlineLog): Unit = if (shouldLog(log.request)) { + if (upstream != null) upstream.downstreamLog += log + else { + val methodLogs = logs.getOrElseUpdate(log.request.callsite.callsiteMethod, mutable.LinkedHashSet.empty) + methodLogs += log + } + } + + def entryString(log: InlineLog, indent: Int = 0): String = { + val callee = log.request.callsite.callee.get + val calleeString = callee.calleeDeclarationClass.internalName + "." + callee.callee.name + val indentString = " " * indent + log match { + case s @ InlineLogSuccess(_, sizeBefore, sizeInlined) => + val self = s"${indentString}inlined $calleeString. Before: $sizeBefore ins, inlined: $sizeInlined ins." + if (s.downstreamLog.isEmpty) self + else s.downstreamLog.iterator.map(entryString(_, indent + 2)).mkString(self + "\n", "\n", "") + + case InlineLogFail(_, w) => + s"${indentString}failed $calleeString. ${w.toString.replace('\n', ' ')}" + + case InlineLogRollback(_, _) => + s"${indentString}rolling back, nested inline failed." + } + } + + def print(): Unit = if (compilerSettings.YoptLogInline.isSetByUser) { + val byClassAndMethod: List[(InternalName, mutable.Map[MethodNode, mutable.LinkedHashSet[InlineLog]])] = { + logs. + groupBy(_._2.head.request.callsite.callsiteClass.internalName). + toList.sortBy(_._1) + } + for { + (c, methodLogs) <- byClassAndMethod + (m, mLogs) <- methodLogs.toList.sortBy(_._1.name) + mLog <- mLogs // insertion order + } { + println(s"Inline into $c.${m.name}: ${entryString(mLog)}") + } } } def runInliner(): Unit = { - rewriteFinalTraitMethodInvocations() - for (request <- collectAndOrderInlineRequests) { - val Right(callee) = request.callee // collectAndOrderInlineRequests returns callsites with a known callee - - // Inlining a method can create unreachable code. Example: - // def f = throw e - // def g = f; println() // println is unreachable after inlining f - // If we have an inline request for a call to g, and f has been already inlined into g, we - // need to run DCE before inlining g. - eliminateUnreachableCodeAndUpdateCallGraph(callee.callee, callee.calleeDeclarationClass.internalName) - - // DCE above removes unreachable callsites from the call graph. If the inlining request denotes - // such an eliminated callsite, do nothing. - if (callGraph.callsites contains request.callsiteInstruction) { - val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass, - callee.callee, callee.calleeDeclarationClass, - request.receiverKnownNotNull, keepLineNumbers = false) - - for (warning <- r) { - if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) { - val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else "" - val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning" - backendReporting.inlinerWarning(request.callsitePosition, msg) - } - } + val Right(callee) = request.callsite.callee // collectAndOrderInlineRequests returns callsites with a known callee + val warnings = inline(request) + for (warning <- warnings) { + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(request.callsite.callsitePosition, warning.toString) } } + InlineLog.print() } /** @@ -69,165 +135,21 @@ class Inliner[BT <: BTypes](val btypes: BT) { * - Always remove the same request when breaking inlining cycles * - Perform inlinings in a consistent order */ - object callsiteOrdering extends Ordering[Callsite] { - override def compare(x: Callsite, y: Callsite): Int = { - val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName + object callsiteOrdering extends Ordering[InlineRequest] { + override def compare(x: InlineRequest, y: InlineRequest): Int = { + val xCs = x.callsite + val yCs = y.callsite + val cls = xCs.callsiteClass.internalName compareTo yCs.callsiteClass.internalName if (cls != 0) return cls - val name = x.callsiteMethod.name compareTo y.callsiteMethod.name + val name = xCs.callsiteMethod.name compareTo yCs.callsiteMethod.name if (name != 0) return name - val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc + val desc = xCs.callsiteMethod.desc compareTo yCs.callsiteMethod.desc if (desc != 0) return desc def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction) - pos(x) - pos(y) - } - } - - /** - * Select callsites from the call graph that should be inlined. The resulting list of inlining - * requests is allowed to have cycles, and the callsites can appear in any order. - */ - def selectCallsitesForInlining: List[Callsite] = { - callsites.valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) - - if (!res) { - if (annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) { - // if the callsite is annotated @inline, we report an inline warning even if the underlying - // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). - def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" - def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") - if (doRewriteTraitCallsite(callsite)) - backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) - else if (!safeToInline) - backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) - else - backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) - } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { - // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. - backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) - } - } - - res - - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => - if (warning.emitWarning(compilerSettings)) - backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - }).toList - } - - /** - * The current inlining heuristics are simple: inline calls to methods annotated @inline. - */ - def doInlineCallsite(callsite: Callsite): Boolean = callsite match { - case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline - else annotatedInline && safeToInline - - case _ => false - } - - def rewriteFinalTraitMethodInvocations(): Unit = { - // Rewriting final trait method callsites to the implementation class enables inlining. - // We cannot just iterate over the values of the `callsites` map because the rewrite changes the - // map. Therefore we first copy the values to a list. - callsites.values.toList.foreach(rewriteFinalTraitMethodInvocation) - } - - /** - * True for statically resolved trait callsites that should be rewritten to the static implementation method. - */ - def doRewriteTraitCallsite(callsite: Callsite) = callsite.callee match { - case Right(Callee(callee, calleeDeclarationClass, safeToInline, true, annotatedInline, annotatedNoInline, infoWarning)) => true - case _ => false - } - - /** - * Rewrite the INVOKEINTERFACE callsite of a final trait method invocation to INVOKESTATIC of the - * corresponding method in the implementation class. This enables inlining final trait methods. - * - * In a final trait method callsite, the callee is safeToInline and the callee method is abstract - * (the receiver type is the interface, so the method is abstract). - */ - def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = { - if (doRewriteTraitCallsite(callsite)) { - val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, infoWarning)) = callsite.callee - - val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc) - - val implClassInternalName = calleeDeclarationClass.internalName + "$class" - - val selfParamTypeV: Either[OptimizerWarning, ClassBType] = calleeDeclarationClass.info.map(_.inlineInfo.traitImplClassSelfType match { - case Some(internalName) => classBTypeFromParsedClassfile(internalName) - case None => calleeDeclarationClass - }) - - def implClassMethodV(implMethodDescriptor: String): Either[OptimizerWarning, MethodNode] = { - byteCodeRepository.methodNode(implClassInternalName, callee.name, implMethodDescriptor).map(_._1) - } - - // The rewrite reading the implementation class and the implementation method from the bytecode - // repository. If either of the two fails, the rewrite is not performed. - val res = for { - selfParamType <- selfParamTypeV - implMethodDescriptor = asm.Type.getMethodDescriptor(asm.Type.getReturnType(callee.desc), selfParamType.toASMType +: traitMethodArgumentTypes: _*) - implClassMethod <- implClassMethodV(implMethodDescriptor) - implClassBType = classBTypeFromParsedClassfile(implClassInternalName) - selfTypeOk <- calleeDeclarationClass.isSubtypeOf(selfParamType) - } yield { - - // The self parameter type may be incompatible with the trait type. - // trait T { self: S => def foo = 1 } - // The $self parameter type of T$class.foo is S, which may be unrelated to T. If we re-write - // a call to T.foo to T$class.foo, we need to cast the receiver to S, otherwise we get a - // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the - // receiver value and add a cast to the self type after each. - if (!selfTypeOk) { - // there's no need to run eliminateUnreachableCode here. building the call graph does that - // already, no code can become unreachable in the meantime. - val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new SourceInterpreter) - val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length) - for (i <- receiverValue.insns.asScala) { - val cast = new TypeInsnNode(CHECKCAST, selfParamType.internalName) - callsite.callsiteMethod.instructions.insert(i, cast) - } - } - - val newCallsiteInstruction = new MethodInsnNode(INVOKESTATIC, implClassInternalName, callee.name, implMethodDescriptor, false) - callsite.callsiteMethod.instructions.insert(callsite.callsiteInstruction, newCallsiteInstruction) - callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction) - - callGraph.callsites.remove(callsite.callsiteInstruction) - val staticCallsite = Callsite( - callsiteInstruction = newCallsiteInstruction, - callsiteMethod = callsite.callsiteMethod, - callsiteClass = callsite.callsiteClass, - callee = Right(Callee( - callee = implClassMethod, - calleeDeclarationClass = implClassBType, - safeToInline = true, - safeToRewrite = false, - annotatedInline = annotatedInline, - annotatedNoInline = annotatedNoInline, - calleeInfoWarning = infoWarning)), - argInfos = Nil, - callsiteStackHeight = callsite.callsiteStackHeight, - receiverKnownNotNull = callsite.receiverKnownNotNull, - callsitePosition = callsite.callsitePosition - ) - callGraph.callsites(newCallsiteInstruction) = staticCallsite - } - - for (warning <- res.left) { - val Right(callee) = callsite.callee - val newCallee = callee.copy(calleeInfoWarning = Some(RewriteTraitCallToStaticImplMethodFailed(calleeDeclarationClass.internalName, callee.callee.name, callee.callee.desc, warning))) - callGraph.callsites(callsite.callsiteInstruction) = callsite.copy(callee = Right(newCallee)) - } + pos(xCs) - pos(yCs) } } @@ -238,15 +160,13 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The resulting list is sorted such that the leaves of the inline request graph are on the left. * Once these leaves are inlined, the successive elements will be leaves, etc. */ - private def collectAndOrderInlineRequests: List[Callsite] = { - val requests = selectCallsitesForInlining + private def collectAndOrderInlineRequests: List[InlineRequest] = { + val requestsByMethod = selectCallsitesForInlining withDefaultValue Set.empty + + val elided = mutable.Set.empty[InlineRequest] + def nonElidedRequests(methodNode: MethodNode): Set[InlineRequest] = requestsByMethod(methodNode) diff elided - // This map is an index to look up the inlining requests for a method. The value sets are mutable - // to allow removing elided requests (to break inlining cycles). The map itself is mutable to - // allow efficient building: requests.groupBy would build values as List[Callsite] that need to - // be transformed to mutable sets. - val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty) - for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r + def allCallees(r: InlineRequest): Set[MethodNode] = r.post.flatMap(allCallees).toSet + r.callsite.callee.get.callee /** * Break cycles in the inline request graph by removing callsites. @@ -254,236 +174,454 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The list `requests` is traversed left-to-right, removing those callsites that are part of a * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map. */ - def breakInlineCycles(requests: List[Callsite]): List[Callsite] = { + def breakInlineCycles: List[InlineRequest] = { // is there a path of inline requests from start to goal? - def isReachable(start: MethodNode, goal: MethodNode): Boolean = { - @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match { - case x :: xs => + def isReachable(start: Set[MethodNode], goal: MethodNode): Boolean = { + @tailrec def reachableImpl(check: Set[MethodNode], visited: Set[MethodNode]): Boolean = { + if (check.isEmpty) false + else { + val x = check.head if (x == goal) true - else if (visited(x)) reachableImpl(xs, visited) + else if (visited(x)) reachableImpl(check - x, visited) else { - val callees = inlineRequestsForMethod(x).map(_.callee.get.callee) - reachableImpl(xs ::: callees.toList, visited + x) + val callees = nonElidedRequests(x).flatMap(allCallees) + reachableImpl(check - x ++ callees, visited + x) } - - case Nil => - false + } } - reachableImpl(List(start), Set.empty) + reachableImpl(start, Set.empty) } - val result = new mutable.ListBuffer[Callsite]() + val result = new mutable.ListBuffer[InlineRequest]() + val requests = requestsByMethod.valuesIterator.flatten.toArray // sort the inline requests to ensure that removing requests is deterministic - for (r <- requests.sorted(callsiteOrdering)) { + java.util.Arrays.sort(requests, callsiteOrdering) + for (r <- requests) { // is there a chain of inlining requests that would inline the callsite method into the callee? - if (isReachable(r.callee.get.callee, r.callsiteMethod)) - inlineRequestsForMethod(r.callsiteMethod) -= r + if (isReachable(allCallees(r), r.callsite.callsiteMethod)) + elided += r else result += r + () } result.toList } // sort the remaining inline requests such that the leaves appear first, then those requests // that become leaves, etc. - def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = { + def leavesFirst(requests: List[InlineRequest], visited: Set[InlineRequest] = Set.empty): List[InlineRequest] = { if (requests.isEmpty) Nil else { val (leaves, others) = requests.partition(r => { - val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee) - inlineRequestsForCallee.forall(visited) + val inlineRequestsForCallees = allCallees(r).flatMap(nonElidedRequests) + inlineRequestsForCallees.forall(visited) }) assert(leaves.nonEmpty, requests) leaves ::: leavesFirst(others, visited ++ leaves) } } - leavesFirst(breakInlineCycles(requests)) + leavesFirst(breakInlineCycles) } - /** - * Copy and adapt the instructions of a method to a callsite. + * Given an InlineRequest(mainCallsite, post = List(postCallsite)), the postCallsite is a callsite + * in the method `mainCallsite.callee`. Once the mainCallsite is inlined into the target method + * (mainCallsite.callsiteMethod), we need to find the cloned callsite that corresponds to the + * postCallsite so we can inline that into the target method as well. * - * Preconditions: - * - The maxLocals and maxStack values of the callsite method are correctly computed - * - The callsite method contains no unreachable basic blocks, i.e., running an [[Analyzer]] - * does not produce any `null` frames + * However, it is possible that there is no cloned callsite at all that corresponds to the + * postCallsite, for example if the corresponding callsite already inlined. Example: + * + * def a() = 1 + * def b() = a() + 2 + * def c() = b() + 3 + * def d() = c() + 4 + * + * We have the following callsite objects in the call graph: + * + * c1 = a() in b + * c2 = b() in c + * c3 = c() in d * - * @param callsiteInstruction The invocation instruction - * @param callsiteStackHeight The stack height at the callsite - * @param callsiteMethod The method in which the invocation occurs - * @param callsiteClass The class in which the callsite method is defined - * @param callee The invoked method - * @param calleeDeclarationClass The class in which the invoked method is defined - * @param receiverKnownNotNull `true` if the receiver is known to be non-null - * @param keepLineNumbers `true` if LineNumberNodes should be copied to the call site - * @return `Some(message)` if inlining cannot be performed, `None` otherwise + * Assume we have the following inline request + * r = InlineRequest(c3, + * post = List(InlineRequest(c2, + * post = List(InlineRequest(c1, post = Nil))))) + * + * But before inlining r, assume a separate InlineRequest(c2, post = Nil) is inlined first. We get + * + * c1' = a() in c // added to the call graph + * c1.inlinedClones += (c1' at c2) // remember that c1' was created when inlining c2 + * ~c2~ // c2 is removed from the call graph + * + * If we now inline r, we first inline c3. We get + * + * c1'' = a() in d // added to call graph + * c1'.inlinedClones += (c1'' at c3) // remember that c1'' was created when inlining c3 + * ~c3~ + * + * Now we continue with the post-requests for r, i.e. c2. + * - we try to find the clone of c2 that was created when inlining c3 - but there is none. c2 + * was already inlined before + * - we continue with the post-request of c2: c1 + * - we search for the callsite of c1 that was cloned when inlining c2, we find c1' + * - recursively we search for the callsite of c1' that was cloned when inlining c3, we find c1'' + * - so we create an inline request for c1'' */ - def inline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType, - receiverKnownNotNull: Boolean, keepLineNumbers: Boolean): Option[CannotInlineWarning] = { - canInline(callsiteInstruction, callsiteStackHeight, callsiteMethod, callsiteClass, callee, calleeDeclarationClass) orElse { - // New labels for the cloned instructions - val labelsMap = cloneLabels(callee) - val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap) - if (!keepLineNumbers) { - removeLineNumberNodes(clonedInstructions) + def adaptPostRequestForMainCallsite(post: InlineRequest, mainCallsite: Callsite): List[InlineRequest] = { + def impl(post: InlineRequest, at: Callsite): List[InlineRequest] = { + post.callsite.inlinedClones.find(_.clonedWhenInlining == at) match { + case Some(clonedCallsite) => + List(InlineRequest(clonedCallsite.callsite, post.post, post.reason)) + case None => + post.post.flatMap(impl(_, post.callsite)).flatMap(impl(_, at)) } + } + impl(post, mainCallsite) + } + + class UndoLog(active: Boolean = true) { + import java.util.{ ArrayList => JArrayList } + + private var actions = List.empty[() => Unit] + private var methodStateSaved = false + + def apply(a: => Unit): Unit = if (active) actions = (() => a) :: actions + def rollback(): Unit = if (active) actions.foreach(_.apply()) - // local vars in the callee are shifted by the number of locals at the callsite - val localVarShift = callsiteMethod.maxLocals - clonedInstructions.iterator.asScala foreach { - case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift - case iinc: IincInsnNode => iinc.`var` += localVarShift - case _ => () + def saveMethodState(methodNode: MethodNode): Unit = if (active && !methodStateSaved) { + methodStateSaved = true + val currentInstructions = methodNode.instructions.toArray + val currentLocalVariables = new JArrayList(methodNode.localVariables) + val currentTryCatchBlocks = new JArrayList(methodNode.tryCatchBlocks) + val currentMaxLocals = methodNode.maxLocals + val currentMaxStack = methodNode.maxStack + + apply { + // `methodNode.instructions.clear()` doesn't work: it keeps the `prev` / `next` / `index` of + // instruction nodes. `instructions.removeAll(true)` would work, but is not public. + methodNode.instructions.iterator.asScala.toList.foreach(methodNode.instructions.remove) + for (i <- currentInstructions) methodNode.instructions.add(i) + + methodNode.localVariables.clear() + methodNode.localVariables.addAll(currentLocalVariables) + + methodNode.tryCatchBlocks.clear() + methodNode.tryCatchBlocks.addAll(currentTryCatchBlocks) + + methodNode.maxLocals = currentMaxLocals + methodNode.maxStack = currentMaxStack } + } + } - // add a STORE instruction for each expected argument, including for THIS instance if any - val argStores = new InsnList - var nextLocalIndex = callsiteMethod.maxLocals - if (!isStaticMethod(callee)) { - if (!receiverKnownNotNull) { - argStores.add(new InsnNode(DUP)) - val nonNullLabel = newLabelNode - argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) - argStores.add(new InsnNode(ACONST_NULL)) - argStores.add(new InsnNode(ATHROW)) - argStores.add(nonNullLabel) + val NoUndoLogging = new UndoLog(active = false) + + /** + * Inline the callsite of an inlining request and its post-inlining requests. + * + * @return An inliner warning for each callsite that could not be inlined. + */ + def inline(request: InlineRequest, undo: UndoLog = NoUndoLogging): List[CannotInlineWarning] = { + def doInline(undo: UndoLog, callRollback: Boolean = false): List[CannotInlineWarning] = { + InlineLog.withInlineLogging(request) { + inlineCallsite(request.callsite, undo) + } { + val postRequests = request.post.flatMap(adaptPostRequestForMainCallsite(_, request.callsite)) + val warnings = postRequests.flatMap(inline(_, undo)) + if (callRollback && warnings.nonEmpty) { + undo.rollback() + InlineLog(InlineLogRollback(request, warnings)) } - argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) - nextLocalIndex += 1 + warnings } + } - // We just use an asm.Type here, no need to create the MethodBType. - val calleAsmType = asm.Type.getMethodType(callee.desc) + def inlinedByPost(insns: List[AbstractInsnNode]): Boolean = + insns.nonEmpty && insns.forall(ins => request.post.exists(_.callsite.callsiteInstruction == ins)) - for(argTp <- calleAsmType.getArgumentTypes) { - val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp - argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack - nextLocalIndex += argTp.getSize + canInlineCallsite(request.callsite) match { + case None => + doInline(undo) + + case Some((_, illegalAccessInsns)) if inlinedByPost(illegalAccessInsns) => + // speculatively inline, roll back if an illegalAccessInsn cannot be eliminated + if (undo == NoUndoLogging) doInline(new UndoLog(), callRollback = true) + else doInline(undo) + + case Some((w, _)) => + InlineLog(InlineLogFail(request, w)) + List(w) + } + } + + /** + * Copy and adapt the instructions of a method to a callsite. + * + * Preconditions: + * - The callsite can safely be inlined (canInlineBody is true) + * - The maxLocals and maxStack values of the callsite method are correctly computed + * + * @return A map associating instruction nodes of the callee with the corresponding cloned + * instruction in the callsite method. + */ + def inlineCallsite(callsite: Callsite, undo: UndoLog = NoUndoLogging): Unit = { + import callsite.{callsiteClass, callsiteMethod, callsiteInstruction, receiverKnownNotNull, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass, sourceFilePath} + + // Inlining requires the callee not to have unreachable code, the analyzer used below should not + // return any `null` frames. Note that inlining a method can create unreachable code. Example: + // def f = throw e + // def g = f; println() // println is unreachable after inlining f + // If we have an inline request for a call to g, and f has been already inlined into g, we + // need to run DCE on g's body before inlining g. + localOpt.minimalRemoveUnreachableCode(callee, calleeDeclarationClass.internalName) + + // If the callsite was eliminated by DCE, do nothing. + if (!callGraph.containsCallsite(callsite)) return + + // New labels for the cloned instructions + val labelsMap = cloneLabels(callee) + val sameSourceFile = sourceFilePath match { + case Some(calleeSource) => byteCodeRepository.compilingClasses.get(callsiteClass.internalName) match { + case Some((_, `calleeSource`)) => true + case _ => false } + case _ => false + } + val (clonedInstructions, instructionMap, targetHandles) = cloneInstructions(callee, labelsMap, keepLineNumbers = sameSourceFile) + + // local vars in the callee are shifted by the number of locals at the callsite + val localVarShift = callsiteMethod.maxLocals + clonedInstructions.iterator.asScala foreach { + case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift + case iinc: IincInsnNode => iinc.`var` += localVarShift + case _ => () + } - clonedInstructions.insert(argStores) - - // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label. - val postCallLabel = newLabelNode - clonedInstructions.add(postCallLabel) - - // replace xRETURNs: - // - store the return value (if any) - // - clear the stack of the inlined method (insert DROPs) - // - load the return value - // - GOTO postCallLabel - - val returnType = calleAsmType.getReturnType - val hasReturnValue = returnType.getSort != asm.Type.VOID - val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals - nextLocalIndex += returnType.getSize - - def returnValueStore(returnInstruction: AbstractInsnNode) = { - val opc = returnInstruction.getOpcode match { - case IRETURN => ISTORE - case LRETURN => LSTORE - case FRETURN => FSTORE - case DRETURN => DSTORE - case ARETURN => ASTORE - } - new VarInsnNode(opc, returnValueIndex) + // add a STORE instruction for each expected argument, including for THIS instance if any + val argStores = new InsnList + var nextLocalIndex = callsiteMethod.maxLocals + if (!isStaticMethod(callee)) { + if (!receiverKnownNotNull) { + argStores.add(new InsnNode(DUP)) + val nonNullLabel = newLabelNode + argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) + argStores.add(new InsnNode(ACONST_NULL)) + argStores.add(new InsnNode(ATHROW)) + argStores.add(nonNullLabel) } + argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) + nextLocalIndex += 1 + } - // We run an interpreter to know the stack height at each xRETURN instruction and the sizes - // of the values on the stack. - val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) + // We just use an asm.Type here, no need to create the MethodBType. + val calleAsmType = asm.Type.getMethodType(callee.desc) + val calleeParamTypes = calleAsmType.getArgumentTypes - for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { - val frame = analyzer.frameAt(originalReturn) - var stackHeight = frame.getStackSize + for(argTp <- calleeParamTypes) { + val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp + argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack + nextLocalIndex += argTp.getSize + } - val inlinedReturn = instructionMap(originalReturn) - val returnReplacement = new InsnList + clonedInstructions.insert(argStores) + + // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label. + val postCallLabel = newLabelNode + clonedInstructions.add(postCallLabel) + + // replace xRETURNs: + // - store the return value (if any) + // - clear the stack of the inlined method (insert DROPs) + // - load the return value + // - GOTO postCallLabel + + val returnType = calleAsmType.getReturnType + val hasReturnValue = returnType.getSort != asm.Type.VOID + val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals + nextLocalIndex += returnType.getSize + + def returnValueStore(returnInstruction: AbstractInsnNode) = { + val opc = returnInstruction.getOpcode match { + case IRETURN => ISTORE + case LRETURN => LSTORE + case FRETURN => FSTORE + case DRETURN => DSTORE + case ARETURN => ASTORE + } + new VarInsnNode(opc, returnValueIndex) + } - def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) + // We run an interpreter to know the stack height at each xRETURN instruction and the sizes + // of the values on the stack. + // We don't need to worry about the method being too large for running an analysis. Callsites of + // large methods are not added to the call graph. + val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) - // for non-void methods, store the stack top into the return local variable - if (hasReturnValue) { - returnReplacement add returnValueStore(originalReturn) - stackHeight -= 1 - } + for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { + val frame = analyzer.frameAt(originalReturn) + var stackHeight = frame.getStackSize - // drop the rest of the stack - for (i <- 0 until stackHeight) drop(i) + val inlinedReturn = instructionMap(originalReturn) + val returnReplacement = new InsnList - returnReplacement add new JumpInsnNode(GOTO, postCallLabel) - clonedInstructions.insert(inlinedReturn, returnReplacement) - clonedInstructions.remove(inlinedReturn) - } + def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) - // Load instruction for the return value + // for non-void methods, store the stack top into the return local variable if (hasReturnValue) { - val retVarLoad = { - val opc = returnType.getOpcode(ILOAD) - new VarInsnNode(opc, returnValueIndex) - } - clonedInstructions.insert(postCallLabel, retVarLoad) + returnReplacement add returnValueStore(originalReturn) + stackHeight -= 1 } - callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) - callsiteMethod.instructions.remove(callsiteInstruction) - - callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) - callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) - - // Add all invocation instructions and closure instantiations that were inlined to the call graph - callee.instructions.iterator().asScala foreach { - case originalCallsiteIns: MethodInsnNode => - callGraph.callsites.get(originalCallsiteIns) match { - case Some(originalCallsite) => - val newCallsiteIns = instructionMap(originalCallsiteIns).asInstanceOf[MethodInsnNode] - callGraph.callsites(newCallsiteIns) = Callsite( - callsiteInstruction = newCallsiteIns, - callsiteMethod = callsiteMethod, - callsiteClass = callsiteClass, - callee = originalCallsite.callee, - argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) - callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, - receiverKnownNotNull = originalCallsite.receiverKnownNotNull, - callsitePosition = originalCallsite.callsitePosition - ) - - case None => - } + // drop the rest of the stack + for (i <- 0 until stackHeight) drop(i) - case indy: InvokeDynamicInsnNode => - callGraph.closureInstantiations.get(indy) match { - case Some(closureInit) => - val newIndy = instructionMap(indy).asInstanceOf[InvokeDynamicInsnNode] - callGraph.closureInstantiations(newIndy) = ClosureInstantiation(closureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) - - case None => - } + returnReplacement add new JumpInsnNode(GOTO, postCallLabel) + clonedInstructions.insert(inlinedReturn, returnReplacement) + clonedInstructions.remove(inlinedReturn) + } - case _ => + // Load instruction for the return value + if (hasReturnValue) { + val retVarLoad = { + val opc = returnType.getOpcode(ILOAD) + new VarInsnNode(opc, returnValueIndex) } - // Remove the elided invocation from the call graph - callGraph.callsites.remove(callsiteInstruction) + clonedInstructions.insert(postCallLabel, retVarLoad) + } - // Inlining a method body can render some code unreachable, see example above (in runInliner). - unreachableCodeEliminated -= callsiteMethod + undo.saveMethodState(callsiteMethod) - callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight) + callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) + callsiteMethod.instructions.remove(callsiteInstruction) - None + callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name, localVarShift).asJava) + // prepend the handlers of the callee. the order of handlers matters: when an exception is thrown + // at some instruction, the first handler guarding that instruction and having a matching exception + // type is executed. prepending the callee's handlers makes sure to test those handlers first if + // an exception is thrown in the inlined code. + callsiteMethod.tryCatchBlocks.addAll(0, cloneTryCatchBlockNodes(callee, labelsMap).asJava) + + callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals + val maxStackOfInlinedCode = { + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callee.maxStack + callsiteStackHeight - numStoredArgs + } + val stackHeightAtNullCheck = { + // When adding a null check for the receiver, a DUP is inserted, which might cause a new maxStack. + // If the callsite has other argument values than the receiver on the stack, these are pop'ed + // and stored into locals before the null check, so in that case the maxStack doesn't grow. + val stackSlotForNullCheck = if (!isStaticMethod(callee) && !receiverKnownNotNull && calleeParamTypes.isEmpty) 1 else 0 + callsiteStackHeight + stackSlotForNullCheck } + + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, math.max(stackHeightAtNullCheck, maxStackOfInlinedCode)) + + val added = addIndyLambdaImplMethod(callsiteClass.internalName, targetHandles) + undo { removeIndyLambdaImplMethod(callsiteClass.internalName, added) } + + callGraph.addIfMissing(callee, calleeDeclarationClass) + + def mapArgInfo(argInfo: (Int, ArgInfo)): Option[(Int, ArgInfo)] = argInfo match { + case lit @ (_, FunctionLiteral) => Some(lit) + case (argIndex, ForwardedParam(paramIndex)) => callsite.argInfos.get(paramIndex).map((argIndex, _)) + } + + // Add all invocation instructions and closure instantiations that were inlined to the call graph + callGraph.callsites(callee).valuesIterator foreach { originalCallsite => + val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] + val argInfos = originalCallsite.argInfos flatMap mapArgInfo + val newCallsite = originalCallsite.copy( + callsiteInstruction = newCallsiteIns, + callsiteMethod = callsiteMethod, + callsiteClass = callsiteClass, + argInfos = argInfos, + callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight + ) + val clonedCallsite = ClonedCallsite(newCallsite, callsite) + originalCallsite.inlinedClones += clonedCallsite + callGraph.addCallsite(newCallsite) + undo { + originalCallsite.inlinedClones -= clonedCallsite + callGraph.removeCallsite(newCallsite.callsiteInstruction, newCallsite.callsiteMethod) + } + } + + callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => + val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] + val capturedArgInfos = originalClosureInit.capturedArgInfos flatMap mapArgInfo + val newClosureInit = ClosureInstantiation( + originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), + callsiteMethod, + callsiteClass, + capturedArgInfos) + originalClosureInit.inlinedClones += newClosureInit + callGraph.addClosureInstantiation(newClosureInit) + undo { + callGraph.removeClosureInstantiation(newClosureInit.lambdaMetaFactoryCall.indy, newClosureInit.ownerMethod) + } + } + + // Remove the elided invocation from the call graph + callGraph.removeCallsite(callsiteInstruction, callsiteMethod) + undo { callGraph.addCallsite(callsite) } + + // Inlining a method body can render some code unreachable, see example above in this method. + unreachableCodeEliminated -= callsiteMethod } /** - * Check whether an inling can be performed. Parmeters are described in method [[inline]]. + * Check whether an inlining can be performed. This method performs tests that don't change even + * if the body of the callee is changed by the inliner / optimizer, so it can be used early + * (when looking at the call graph and collecting inline requests for the program). + * + * The tests that inspect the callee's instructions are implemented in method `canInlineBody`, + * which is queried when performing an inline. + * * @return `Some(message)` if inlining cannot be performed, `None` otherwise */ - def canInline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType): Option[CannotInlineWarning] = { + def earlyCanInlineCheck(callsite: Callsite): Option[CannotInlineWarning] = { + import callsite.{callsiteMethod, callsiteClass} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} + + if (isSynchronizedMethod(callee)) { + // Could be done by locking on the receiver, wrapping the inlined code in a try and unlocking + // in finally. But it's probably not worth the effort, scala never emits synchronized methods. + Some(SynchronizedMethod(calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated)) + } else if (isStrictfpMethod(callsiteMethod) != isStrictfpMethod(callee)) { + Some(StrictfpMismatch( + calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated, + callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)) + } else + None + } + + /** + * Check whether the body of the callee contains any instructions that prevent the callsite from + * being inlined. See also method `earlyCanInlineCheck`. + * + * The result of this check depends on changes to the callee method's body. For example, if the + * callee initially invokes a private method, it cannot be inlined into a different class. If the + * private method is inlined into the callee, inlining the callee becomes possible. Therefore + * we don't query it while traversing the call graph and selecting callsites to inline - it might + * rule out callsites that can be inlined just fine. + * + * Returns + * - `None` if the callsite can be inlined + * - `Some((message, Nil))` if there was an issue performing the access checks, for example + * because of a missing classfile + * - `Some((message, instructions))` if inlining `instructions` into the callsite method would + * cause an IllegalAccessError + */ + def canInlineCallsite(callsite: Callsite): Option[(CannotInlineWarning, List[AbstractInsnNode])] = { + import callsite.{callsiteInstruction, callsiteMethod, callsiteClass, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} def calleeDesc = s"${callee.name} of type ${callee.desc} in ${calleeDeclarationClass.internalName}" def methodMismatch = s"Wrong method node for inlining ${textify(callsiteInstruction)}: $calleeDesc" @@ -511,31 +649,30 @@ class Inliner[BT <: BTypes](val btypes: BT) { } if (codeSizeOKForInlining(callsiteMethod, callee)) { - Some(ResultingMethodTooLarge( - calleeDeclarationClass.internalName, callee.name, callee.desc, - callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)) - } else if (isSynchronizedMethod(callee)) { - // Could be done by locking on the receiver, wrapping the inlined code in a try and unlocking - // in finally. But it's probably not worth the effort, scala never emits synchronized methods. - Some(SynchronizedMethod(calleeDeclarationClass.internalName, callee.name, callee.desc)) - } else if (isStrictfpMethod(callsiteMethod) != isStrictfpMethod(callee)) { - Some(StrictfpMismatch( - calleeDeclarationClass.internalName, callee.name, callee.desc, - callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)) + val warning = ResultingMethodTooLarge( + calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated, + callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc) + Some((warning, Nil)) } else if (!callee.tryCatchBlocks.isEmpty && stackHasNonParameters) { - Some(MethodWithHandlerCalledOnNonEmptyStack( - calleeDeclarationClass.internalName, callee.name, callee.desc, - callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc)) - } else findIllegalAccess(callee.instructions, calleeDeclarationClass, callsiteClass) map { - case (illegalAccessIns, None) => - IllegalAccessInstruction( - calleeDeclarationClass.internalName, callee.name, callee.desc, - callsiteClass.internalName, illegalAccessIns) - - case (illegalAccessIns, Some(warning)) => - IllegalAccessCheckFailed( - calleeDeclarationClass.internalName, callee.name, callee.desc, - callsiteClass.internalName, illegalAccessIns, warning) + val warning = MethodWithHandlerCalledOnNonEmptyStack( + calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated, + callsiteClass.internalName, callsiteMethod.name, callsiteMethod.desc) + Some((warning, Nil)) + } else findIllegalAccess(callee.instructions, calleeDeclarationClass, callsiteClass) match { + case Right(Nil) => + None + + case Right(illegalAccessInsns) => + val warning = IllegalAccessInstruction( + calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated, + callsiteClass.internalName, illegalAccessInsns.head) + Some((warning, illegalAccessInsns)) + + case Left((illegalAccessIns, cause)) => + val warning = IllegalAccessCheckFailed( + calleeDeclarationClass.internalName, callee.name, callee.desc, callsite.isInlineAnnotated, + callsiteClass.internalName, illegalAccessIns, cause) + Some((warning, Nil)) } } @@ -545,7 +682,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * (A2) C and D are members of the same run-time package */ def classIsAccessible(accessed: BType, from: ClassBType): Either[OptimizerWarning, Boolean] = (accessed: @unchecked) match { - // TODO: A2 requires "same run-time package", which seems to be package + classloader (JMVS 5.3.). is the below ok? + // TODO: A2 requires "same run-time package", which seems to be package + classloader (JVMS 5.3.). is the below ok? case c: ClassBType => c.isPublic.map(_ || c.packageInternalName == from.packageInternalName) case a: ArrayBType => classIsAccessible(a.elementType, from) case _: PrimitiveBType => Right(true) @@ -587,7 +724,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * type from there (https://github.com/scala-opt/scala/issues/13). */ def memberIsAccessible(memberFlags: Int, memberDeclClass: ClassBType, memberRefClass: ClassBType, from: ClassBType): Either[OptimizerWarning, Boolean] = { - // TODO: B3 requires "same run-time package", which seems to be package + classloader (JMVS 5.3.). is the below ok? + // TODO: B3 requires "same run-time package", which seems to be package + classloader (JVMS 5.3.). is the below ok? def samePackageAsDestination = memberDeclClass.packageInternalName == from.packageInternalName def targetObjectConformsToDestinationClass = false // needs type propagation analysis, see above @@ -624,13 +761,14 @@ class Inliner[BT <: BTypes](val btypes: BT) { } /** - * Returns the first instruction in the `instructions` list that would cause a - * [[java.lang.IllegalAccessError]] when inlined into the `destinationClass`. - * - * If validity of some instruction could not be checked because an error occurred, the instruction - * is returned together with a warning message that describes the problem. + * Returns + * - `Right(Nil)` if all instructions can be safely inlined + * - `Right(insns)` if inlining any of `insns` would cause a [[java.lang.IllegalAccessError]] + * when inlined into the `destinationClass` + * - `Left((insn, warning))` if validity of some instruction could not be checked because an + * error occurred */ - def findIllegalAccess(instructions: InsnList, calleeDeclarationClass: ClassBType, destinationClass: ClassBType): Option[(AbstractInsnNode, Option[OptimizerWarning])] = { + def findIllegalAccess(instructions: InsnList, calleeDeclarationClass: ClassBType, destinationClass: ClassBType): Either[(AbstractInsnNode, OptimizerWarning), List[AbstractInsnNode]] = { /** * Check if `instruction` can be transplanted to `destinationClass`. * @@ -759,17 +897,15 @@ class Inliner[BT <: BTypes](val btypes: BT) { } val it = instructions.iterator.asScala - @tailrec def find: Option[(AbstractInsnNode, Option[OptimizerWarning])] = { - if (!it.hasNext) None // all instructions are legal - else { - val i = it.next() - isLegal(i) match { - case Left(warning) => Some((i, Some(warning))) // checking isLegal for i failed - case Right(false) => Some((i, None)) // an illegal instruction was found - case _ => find - } + val illegalAccess = mutable.ListBuffer.empty[AbstractInsnNode] + while (it.hasNext) { + val i = it.next() + isLegal(i) match { + case Left(warning) => return Left((i, warning)) // checking isLegal for i failed + case Right(false) => illegalAccess += i // an illegal instruction was found + case _ => } } - find + Right(illegalAccess.toList) } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala new file mode 100644 index 0000000000..63360e17ff --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -0,0 +1,339 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package opt + +import scala.annotation.tailrec +import scala.collection.JavaConverters._ +import scala.tools.asm.Opcodes +import scala.tools.asm.tree.{AbstractInsnNode, MethodInsnNode, MethodNode} +import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.tools.nsc.backend.jvm.BackendReporting.{CalleeNotFinal, OptimizerWarning} + +class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { + import bTypes._ + import callGraph._ + + final case class InlineRequest(callsite: Callsite, post: List[InlineRequest], reason: String) { + // invariant: all post inline requests denote callsites in the callee of the main callsite + for (pr <- post) assert(pr.callsite.callsiteMethod == callsite.callee.get.callee, s"Callsite method mismatch: main $callsite - post ${pr.callsite}") + } + + def canInlineFromSource(sourceFilePath: Option[String]) = compilerSettings.optInlineGlobal || sourceFilePath.isDefined + + /** + * Select callsites from the call graph that should be inlined, grouped by the containing method. + * Cyclic inlining requests are allowed, the inliner will eliminate requests to break cycles. + */ + def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = { + // We should only create inlining requests for callsites being compiled (not for callsites in + // classes on the classpath). The call graph may contain callsites of classes parsed from the + // classpath. In order to get only the callsites being compiled, we start at the map of + // compilingClasses in the byteCodeRepository. + val compilingMethods = for { + (classNode, _) <- byteCodeRepository.compilingClasses.valuesIterator + methodNode <- classNode.methods.iterator.asScala + } yield methodNode + + compilingMethods.map(methodNode => { + var requests = Set.empty[InlineRequest] + callGraph.callsites(methodNode).valuesIterator foreach { + case callsite @ Callsite(_, _, _, Right(Callee(callee, _, _, _, _, _, _, callsiteWarning)), _, _, _, pos, _, _) => + inlineRequest(callsite, requests) match { + case Some(Right(req)) => requests += req + + case Some(Left(w)) => + if (w.emitWarning(compilerSettings)) { + backendReporting.inlinerWarning(callsite.callsitePosition, w.toString) + } + + case None => + if (callsiteWarning.isDefined && callsiteWarning.get.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ callsiteWarning.get) + } + + case Callsite(ins, _, _, Left(warning), _, _, _, pos, _, _) => + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") + } + (methodNode, requests) + }).filterNot(_._2.isEmpty).toMap + } + + private def isTraitStaticSuperAccessorName(s: String) = s.endsWith("$") + private def traitStaticSuperAccessorName(s: String) = s + "$" + + private def isTraitSuperAccessor(method: MethodNode, owner: ClassBType): Boolean = { + owner.isInterface == Right(true) && BytecodeUtils.isStaticMethod(method) && isTraitStaticSuperAccessorName(method.name) + } + + private def findSingleCall(method: MethodNode, such: MethodInsnNode => Boolean): Option[MethodInsnNode] = { + @tailrec def noMoreInvoke(insn: AbstractInsnNode): Boolean = { + insn == null || (!insn.isInstanceOf[MethodInsnNode] && noMoreInvoke(insn.getNext)) + } + @tailrec def find(insn: AbstractInsnNode): Option[MethodInsnNode] = { + if (insn == null) None + else insn match { + case mi: MethodInsnNode => + if (such(mi) && noMoreInvoke(insn.getNext)) Some(mi) + else None + case _ => + find(insn.getNext) + } + } + find(method.instructions.getFirst) + } + private def superAccessorInvocation(method: MethodNode): Option[MethodInsnNode] = + findSingleCall(method, mi => mi.itf && mi.getOpcode == Opcodes.INVOKESTATIC && isTraitStaticSuperAccessorName(mi.name)) + + private def isMixinForwarder(method: MethodNode, owner: ClassBType): Boolean = { + owner.isInterface == Right(false) && + !BytecodeUtils.isStaticMethod(method) && + (superAccessorInvocation(method) match { + case Some(mi) => mi.name == traitStaticSuperAccessorName(method.name) + case _ => false + }) + } + + private def isTraitSuperAccessorOrMixinForwarder(method: MethodNode, owner: ClassBType): Boolean = { + isTraitSuperAccessor(method, owner) || isMixinForwarder(method, owner) + } + + + /** + * Returns the inline request for a callsite if the callsite should be inlined according to the + * current heuristics (`-Yopt-inline-heuristics`). + * + * The resulting inline request may contain post-inlining requests of callsites that in turn are + * also selected as individual inlining requests. + * + * @return `None` if this callsite should not be inlined according to the active heuristic + * `Some(Left)` if the callsite cannot be inlined (for example because that would cause + * an IllegalAccessError) but should be according to the heuristic + * TODO: what if a downstream inline request would cause an IAE and we don't create an + * InlineRequest for the original callsite? new subclass of OptimizerWarning. + * `Some(Right)` if the callsite should be and can be inlined + */ + def inlineRequest(callsite: Callsite, selectedRequestsForCallee: Set[InlineRequest]): Option[Either[OptimizerWarning, InlineRequest]] = { + def requestIfCanInline(callsite: Callsite, reason: String): Option[Either[OptimizerWarning, InlineRequest]] = { + val callee = callsite.callee.get + if (!callee.safeToInline) { + if (callsite.isInlineAnnotated && callee.canInlineFromSource) { + // By default, we only emit inliner warnings for methods annotated @inline. However, we don't + // want to be unnecessarily noisy with `-opt-warnings:_`: for example, the inliner heuristic + // would attempt to inline `Function1.apply$sp$II`, as it's higher-order (the receiver is + // a function), and it's concrete (forwards to `apply`). But because it's non-final, it cannot + // be inlined. So we only create warnings here for methods annotated @inline. + Some(Left(CalleeNotFinal( + callee.calleeDeclarationClass.internalName, + callee.callee.name, + callee.callee.desc, + callsite.isInlineAnnotated))) + } else None + } else inliner.earlyCanInlineCheck(callsite) match { + case Some(w) => Some(Left(w)) + case None => + val postInlineRequest: List[InlineRequest] = { + val postCall = + if (isTraitSuperAccessor(callee.callee, callee.calleeDeclarationClass)) { + // scala-dev#259: when inlining a trait super accessor, also inline the callsite to the default method + val implName = callee.callee.name.dropRight(1) + findSingleCall(callee.callee, mi => mi.itf && mi.getOpcode == Opcodes.INVOKESPECIAL && mi.name == implName) + } else { + // scala-dev#259: when inlining a mixin forwarder, also inline the callsite to the static super accessor + superAccessorInvocation(callee.callee) + } + postCall.flatMap(call => { + callGraph.addIfMissing(callee.callee, callee.calleeDeclarationClass) + val maybeCallsite = callGraph.findCallSite(callee.callee, call) + maybeCallsite.flatMap(requestIfCanInline(_, reason).flatMap(_.right.toOption)) + }).toList + } + Some(Right(InlineRequest(callsite, postInlineRequest, reason))) + } + } + + // scala-dev#259: don't inline into static accessors and mixin forwarders + if (isTraitSuperAccessorOrMixinForwarder(callsite.callsiteMethod, callsite.callsiteClass)) None + else { + val callee = callsite.callee.get + compilerSettings.YoptInlineHeuristics.value match { + case "everything" => + val reason = if (compilerSettings.YoptLogInline.isSetByUser) "the inline strategy is \"everything\"" else null + requestIfCanInline(callsite, reason) + + case "at-inline-annotated" => + def reason = if (!compilerSettings.YoptLogInline.isSetByUser) null else { + val what = if (callee.annotatedInline) "callee" else "callsite" + s"the $what is annotated `@inline`" + } + if (callsite.isInlineAnnotated && !callsite.isNoInlineAnnotated) requestIfCanInline(callsite, reason) + else None + + case "default" => + def reason = if (!compilerSettings.YoptLogInline.isSetByUser) null else { + if (callsite.isInlineAnnotated) { + val what = if (callee.annotatedInline) "callee" else "callsite" + s"the $what is annotated `@inline`" + } else { + val paramNames = Option(callee.callee.parameters).map(_.asScala.map(_.name).toVector) + def param(i: Int) = { + def syn = s"<param $i>" + paramNames.fold(syn)(v => v.applyOrElse(i, (_: Int) => syn)) + } + def samInfo(i: Int, sam: String, arg: String) = s"the argument for parameter (${param(i)}: $sam) is a $arg" + val argInfos = for ((i, sam) <- callee.samParamTypes; info <- callsite.argInfos.get(i)) yield { + val argKind = info match { + case FunctionLiteral => "function literal" + case ForwardedParam(_) => "parameter of the callsite method" + } + samInfo(i, sam.internalName.split('/').last, argKind) + } + s"the callee is a higher-order method, ${argInfos.mkString(", ")}" + } + } + def shouldInlineHO = callee.samParamTypes.nonEmpty && (callee.samParamTypes exists { + case (index, _) => callsite.argInfos.contains(index) + }) + if (!callsite.isNoInlineAnnotated && (callsite.isInlineAnnotated || shouldInlineHO)) requestIfCanInline(callsite, reason) + else None + } + } + } + + /* + // using http://lihaoyi.github.io/Ammonite/ + + load.ivy("com.google.guava" % "guava" % "18.0") + val javaUtilFunctionClasses = { + val rt = System.getProperty("sun.boot.class.path").split(":").find(_.endsWith("lib/rt.jar")).get + val u = new java.io.File(rt).toURL + val l = new java.net.URLClassLoader(Array(u)) + val cp = com.google.common.reflect.ClassPath.from(l) + cp.getTopLevelClasses("java.util.function").toArray.map(_.toString).toList + } + + // found using IntelliJ's "Find Usages" on the @FunctionalInterface annotation + val otherClasses = List( + "com.sun.javafx.css.parser.Recognizer", + "java.awt.KeyEventDispatcher", + "java.awt.KeyEventPostProcessor", + "java.io.FileFilter", + "java.io.FilenameFilter", + "java.lang.Runnable", + "java.lang.Thread$UncaughtExceptionHandler", + "java.nio.file.DirectoryStream$Filter", + "java.nio.file.PathMatcher", + "java.time.temporal.TemporalAdjuster", + "java.time.temporal.TemporalQuery", + "java.util.Comparator", + "java.util.concurrent.Callable", + "java.util.logging.Filter", + "java.util.prefs.PreferenceChangeListener", + "javafx.animation.Interpolatable", + "javafx.beans.InvalidationListener", + "javafx.beans.value.ChangeListener", + "javafx.collections.ListChangeListener", + "javafx.collections.MapChangeListener", + "javafx.collections.SetChangeListener", + "javafx.event.EventHandler", + "javafx.util.Builder", + "javafx.util.BuilderFactory", + "javafx.util.Callback" + ) + + val allClasses = javaUtilFunctionClasses ::: otherClasses + + load.ivy("org.ow2.asm" % "asm" % "5.0.4") + val classesAndSamNameDesc = allClasses.map(c => { + val cls = Class.forName(c) + val internalName = org.objectweb.asm.Type.getDescriptor(cls).drop(1).dropRight(1) // drop L and ; + val sams = cls.getMethods.filter(m => { + (m.getModifiers & java.lang.reflect.Modifier.ABSTRACT) != 0 && + m.getName != "equals" // Comparator has an abstract override of "equals" for adding Javadoc + }) + assert(sams.size == 1, internalName + sams.map(_.getName)) + val sam = sams.head + val samDesc = org.objectweb.asm.Type.getMethodDescriptor(sam) + (internalName, sam.getName, samDesc) + }) + println(classesAndSamNameDesc map { + case (cls, nme, desc) => s"""("$cls", "$nme$desc")""" + } mkString ("", ",\n", "\n")) + */ + private val javaSams: Map[String, String] = Map( + ("java/util/function/BiConsumer", "accept(Ljava/lang/Object;Ljava/lang/Object;)V"), + ("java/util/function/BiFunction", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BiPredicate", "test(Ljava/lang/Object;Ljava/lang/Object;)Z"), + ("java/util/function/BinaryOperator", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BooleanSupplier", "getAsBoolean()Z"), + ("java/util/function/Consumer", "accept(Ljava/lang/Object;)V"), + ("java/util/function/DoubleBinaryOperator", "applyAsDouble(DD)D"), + ("java/util/function/DoubleConsumer", "accept(D)V"), + ("java/util/function/DoubleFunction", "apply(D)Ljava/lang/Object;"), + ("java/util/function/DoublePredicate", "test(D)Z"), + ("java/util/function/DoubleSupplier", "getAsDouble()D"), + ("java/util/function/DoubleToIntFunction", "applyAsInt(D)I"), + ("java/util/function/DoubleToLongFunction", "applyAsLong(D)J"), + ("java/util/function/DoubleUnaryOperator", "applyAsDouble(D)D"), + ("java/util/function/Function", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/IntBinaryOperator", "applyAsInt(II)I"), + ("java/util/function/IntConsumer", "accept(I)V"), + ("java/util/function/IntFunction", "apply(I)Ljava/lang/Object;"), + ("java/util/function/IntPredicate", "test(I)Z"), + ("java/util/function/IntSupplier", "getAsInt()I"), + ("java/util/function/IntToDoubleFunction", "applyAsDouble(I)D"), + ("java/util/function/IntToLongFunction", "applyAsLong(I)J"), + ("java/util/function/IntUnaryOperator", "applyAsInt(I)I"), + ("java/util/function/LongBinaryOperator", "applyAsLong(JJ)J"), + ("java/util/function/LongConsumer", "accept(J)V"), + ("java/util/function/LongFunction", "apply(J)Ljava/lang/Object;"), + ("java/util/function/LongPredicate", "test(J)Z"), + ("java/util/function/LongSupplier", "getAsLong()J"), + ("java/util/function/LongToDoubleFunction", "applyAsDouble(J)D"), + ("java/util/function/LongToIntFunction", "applyAsInt(J)I"), + ("java/util/function/LongUnaryOperator", "applyAsLong(J)J"), + ("java/util/function/ObjDoubleConsumer", "accept(Ljava/lang/Object;D)V"), + ("java/util/function/ObjIntConsumer", "accept(Ljava/lang/Object;I)V"), + ("java/util/function/ObjLongConsumer", "accept(Ljava/lang/Object;J)V"), + ("java/util/function/Predicate", "test(Ljava/lang/Object;)Z"), + ("java/util/function/Supplier", "get()Ljava/lang/Object;"), + ("java/util/function/ToDoubleBiFunction", "applyAsDouble(Ljava/lang/Object;Ljava/lang/Object;)D"), + ("java/util/function/ToDoubleFunction", "applyAsDouble(Ljava/lang/Object;)D"), + ("java/util/function/ToIntBiFunction", "applyAsInt(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/function/ToIntFunction", "applyAsInt(Ljava/lang/Object;)I"), + ("java/util/function/ToLongBiFunction", "applyAsLong(Ljava/lang/Object;Ljava/lang/Object;)J"), + ("java/util/function/ToLongFunction", "applyAsLong(Ljava/lang/Object;)J"), + ("java/util/function/UnaryOperator", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("com/sun/javafx/css/parser/Recognizer", "recognize(I)Z"), + ("java/awt/KeyEventDispatcher", "dispatchKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/awt/KeyEventPostProcessor", "postProcessKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/io/FileFilter", "accept(Ljava/io/File;)Z"), + ("java/io/FilenameFilter", "accept(Ljava/io/File;Ljava/lang/String;)Z"), + ("java/lang/Runnable", "run()V"), + ("java/lang/Thread$UncaughtExceptionHandler", "uncaughtException(Ljava/lang/Thread;Ljava/lang/Throwable;)V"), + ("java/nio/file/DirectoryStream$Filter", "accept(Ljava/lang/Object;)Z"), + ("java/nio/file/PathMatcher", "matches(Ljava/nio/file/Path;)Z"), + ("java/time/temporal/TemporalAdjuster", "adjustInto(Ljava/time/temporal/Temporal;)Ljava/time/temporal/Temporal;"), + ("java/time/temporal/TemporalQuery", "queryFrom(Ljava/time/temporal/TemporalAccessor;)Ljava/lang/Object;"), + ("java/util/Comparator", "compare(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/concurrent/Callable", "call()Ljava/lang/Object;"), + ("java/util/logging/Filter", "isLoggable(Ljava/util/logging/LogRecord;)Z"), + ("java/util/prefs/PreferenceChangeListener", "preferenceChange(Ljava/util/prefs/PreferenceChangeEvent;)V"), + ("javafx/animation/Interpolatable", "interpolate(Ljava/lang/Object;D)Ljava/lang/Object;"), + ("javafx/beans/InvalidationListener", "invalidated(Ljavafx/beans/Observable;)V"), + ("javafx/beans/value/ChangeListener", "changed(Ljavafx/beans/value/ObservableValue;Ljava/lang/Object;Ljava/lang/Object;)V"), + ("javafx/collections/ListChangeListener", "onChanged(Ljavafx/collections/ListChangeListener$Change;)V"), + ("javafx/collections/MapChangeListener", "onChanged(Ljavafx/collections/MapChangeListener$Change;)V"), + ("javafx/collections/SetChangeListener", "onChanged(Ljavafx/collections/SetChangeListener$Change;)V"), + ("javafx/event/EventHandler", "handle(Ljavafx/event/Event;)V"), + ("javafx/util/Builder", "build()Ljava/lang/Object;"), + ("javafx/util/BuilderFactory", "getBuilder(Ljava/lang/Class;)Ljavafx/util/Builder;"), + ("javafx/util/Callback", "call(Ljava/lang/Object;)Ljava/lang/Object;") + ) + def javaSam(internalName: InternalName): Option[String] = javaSams.get(internalName) +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala deleted file mode 100644 index 8d744f6d13..0000000000 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala +++ /dev/null @@ -1,240 +0,0 @@ -package scala.tools.nsc.backend.jvm.opt - -import scala.annotation.switch -import scala.tools.asm.{Handle, Type, Opcodes} -import scala.tools.asm.tree._ - -object InstructionResultSize { - import Opcodes._ - def apply(instruction: AbstractInsnNode): Int = (instruction.getOpcode: @switch) match { - // The order of opcodes is (almost) the same as in Opcodes.java - case ACONST_NULL => 1 - - case ICONST_M1 | - ICONST_0 | - ICONST_1 | - ICONST_2 | - ICONST_3 | - ICONST_4 | - ICONST_5 => 1 - - case LCONST_0 | - LCONST_1 => 2 - - case FCONST_0 | - FCONST_1 | - FCONST_2 => 1 - - case DCONST_0 | - DCONST_1 => 2 - - case BIPUSH | - SIPUSH => 1 - - case LDC => - instruction.asInstanceOf[LdcInsnNode].cst match { - case _: java.lang.Integer | - _: java.lang.Float | - _: String | - _: Type | - _: Handle => 1 - - case _: java.lang.Long | - _: java.lang.Double => 2 - } - - case ILOAD | - FLOAD | - ALOAD => 1 - - case LLOAD | - DLOAD => 2 - - case IALOAD | - FALOAD | - AALOAD | - BALOAD | - CALOAD | - SALOAD => 1 - - case LALOAD | - DALOAD => 2 - - case ISTORE | - LSTORE | - FSTORE | - DSTORE | - ASTORE => 0 - - case IASTORE | - LASTORE | - FASTORE | - DASTORE | - AASTORE | - BASTORE | - CASTORE | - SASTORE => 0 - - case POP | - POP2 => 0 - - case DUP | - DUP_X1 | - DUP_X2 | - DUP2 | - DUP2_X1 | - DUP2_X2 | - SWAP => throw new IllegalArgumentException("Can't compute the size of DUP/SWAP without knowing what's on stack top") - - case IADD | - FADD => 1 - - case LADD | - DADD => 2 - - case ISUB | - FSUB => 1 - - case LSUB | - DSUB => 2 - - case IMUL | - FMUL => 1 - - case LMUL | - DMUL => 2 - - case IDIV | - FDIV => 1 - - case LDIV | - DDIV => 2 - - case IREM | - FREM => 1 - - case LREM | - DREM => 2 - - case INEG | - FNEG => 1 - - case LNEG | - DNEG => 2 - - case ISHL | - ISHR => 1 - - case LSHL | - LSHR => 2 - - case IUSHR => 1 - - case LUSHR => 2 - - case IAND | - IOR | - IXOR => 1 - - case LAND | - LOR | - LXOR => 2 - - case IINC => 1 - - case I2F | - L2I | - L2F | - F2I | - D2I | - D2F | - I2B | - I2C | - I2S => 1 - - case I2L | - I2D | - L2D | - F2L | - F2D | - D2L => 2 - - case LCMP | - FCMPL | - FCMPG | - DCMPL | - DCMPG => 1 - - case IFEQ | - IFNE | - IFLT | - IFGE | - IFGT | - IFLE => 0 - - case IF_ICMPEQ | - IF_ICMPNE | - IF_ICMPLT | - IF_ICMPGE | - IF_ICMPGT | - IF_ICMPLE | - IF_ACMPEQ | - IF_ACMPNE => 0 - - case GOTO => 0 - - case JSR => throw new IllegalArgumentException("Subroutines are not supported.") - - case RET => 0 - - case TABLESWITCH | - LOOKUPSWITCH => 0 - - case IRETURN | - FRETURN | - ARETURN => 1 - - case LRETURN | - DRETURN => 2 - - case RETURN => 0 - - case GETSTATIC => Type.getType(instruction.asInstanceOf[FieldInsnNode].desc).getSize - - case PUTSTATIC => 0 - - case GETFIELD => Type.getType(instruction.asInstanceOf[FieldInsnNode].desc).getSize - - case PUTFIELD => 0 - - case INVOKEVIRTUAL | - INVOKESPECIAL | - INVOKESTATIC | - INVOKEINTERFACE => - val desc = instruction.asInstanceOf[MethodInsnNode].desc - Type.getReturnType(desc).getSize - - case INVOKEDYNAMIC => - val desc = instruction.asInstanceOf[InvokeDynamicInsnNode].desc - Type.getReturnType(desc).getSize - - case NEW => 1 - - case NEWARRAY | - ANEWARRAY | - ARRAYLENGTH => 1 - - case ATHROW => 0 - - case CHECKCAST | - INSTANCEOF => 1 - - case MONITORENTER | - MONITOREXIT => 0 - - case MULTIANEWARRAY => 1 - - case IFNULL | - IFNONNULL => 0 - } -} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index 4132710a96..9c22b09cdd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -7,79 +7,180 @@ package scala.tools.nsc package backend.jvm package opt -import scala.annotation.switch -import scala.tools.asm.Opcodes -import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter} +import scala.annotation.{tailrec, switch} + +import scala.tools.asm.Type +import scala.tools.asm.tree.analysis.Frame +import scala.tools.asm.Opcodes._ import scala.tools.asm.tree._ -import scala.collection.convert.decorateAsScala._ +import scala.collection.mutable +import scala.collection.JavaConverters._ import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.tools.nsc.backend.jvm.analysis._ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ /** - * Optimizations within a single method. + * Optimizations within a single method. Certain optimizations enable others, for example removing + * unreachable code can render a `try` block empty and enable removeEmptyExceptionHandlers. The + * latter in turn enables more unreachable code to be eliminated (the `catch` block), so there is + * a cyclic dependency. Optimizations that depend on each other are therefore executed in a loop + * until reaching a fixpoint. + * + * The optimizations marked UPSTREAM enable optimizations that were already executed, so they cause + * another iteration in the fixpoint loop. + * + * nullness optimizations: rewrite null-checking branches to GOTO if nullness is known + * + enables downstream + * - unreachable code (null / non-null branch becomes unreachable) + * - box-unbox elimination (may render an escaping consumer of a box unreachable) + * - stale stores (aload x is replaced by aconst_null if it's known null) + * - simplify jumps (replaces conditional jumps by goto, so may enable goto chains) + * + * unreachable code / DCE (removes instructions of basic blocks to which there is no branch) + * + enables downstream: + * - stale stores (loads may be eliminated, removing consumers of a store) + * - empty handlers (try blocks may become empty) + * - simplify jumps (goto l; [dead code]; l: ..) => remove goto + * - stale local variable descriptors + * - (not box-unbox, which is implemented using prod-cons, so it doesn't consider dead code) + * + * note that eliminating empty handlers and stale local variable descriptors is required for + * correctness, see the comment in the body of `methodOptimizations`. + * + * box-unbox elimination (eliminates box-unbox pairs within the same method) + * + enables UPSTREAM: + * - nullness optimizations (a box extraction operation (unknown nullness) may be rewritten to + * a read of a non-null local. example in doc comment of box-unbox implementation) + * - further box-unbox elimination (e.g. an Integer stored in a Tuple; eliminating the tuple may + * enable eliminating the Integer) + * + enables downstream: + * - copy propagation (new locals are introduced, may be aliases of existing) + * - stale stores (multi-value boxes where not all values are used) + * - redundant casts (`("a", "b")._1`: the generic `_1` method returns `Object`, a cast + * to String is added. The cast is redundant after eliminating the tuple.) + * - empty local variable descriptors (local variables that were holding the box may become unused) + * + * copy propagation (replaces LOAD n to the LOAD m for the smallest m that is an alias of n) + * + enables downstream: + * - stale stores (a stored value may not be loaded anymore) + * - store-load pairs (a load n may now be right after a store n) + * + NOTE: copy propagation is only executed once, in the first fixpoint loop iteration. none of + * the other optimizations enables further copy prop. we still run it as part of the loop + * because it requires unreachable code to be eliminated. + * + * stale stores (replace STORE by POP) + * + enables downstream: + * - push-pop (the new pop may be the single consumer for an instruction) + * + * redundant casts: eliminates casts that are statically known to succeed (uses type propagation) + * + enables UPSTREAM: + * - box-unbox elimination (a removed checkcast may be a box consumer) + * + enables downstream: + * - push-pop for closure allocation elimination (every indyLambda is followed by a checkcast, see SI-9540) + * + * push-pop (when a POP is the only consumer of a value, remove the POP and its producer) + * + enables UPSTREAM: + * - stale stores (if a LOAD is removed, a corresponding STORE may become stale) + * - box-unbox elimination (push-pop may eliminate a closure allocation, rendering a captured + * box non-escaping) + * + enables downstream: + * - store-load pairs (a variable may become non-live) + * - stale handlers (push-pop removes code) + * - simplify jumps (push-pop removes code) + * + * store-load pairs (remove `STORE x; LOAD x` if x is otherwise not used in the method) + * + enables downstream: + * - empty handlers (code is removes, a try block may become empty + * - simplify jumps (code is removed, a goto may become redundant for example) + * - stale local variable descriptors * - * unreachable code - * - removes instructions of basic blocks to which no branch instruction points - * + enables eliminating some exception handlers and local variable descriptors - * > eliminating them is required for correctness, as explained in `removeUnreachableCode` + * empty handlers (removes exception handlers whose try block is empty) + * + enables UPSTREAM: + * - unreachable code (catch block becomes unreachable) + * - box-unbox (a box may be escape in an operation in a dead handler) + * + enables downstream: + * - simplify jumps * - * empty exception handlers - * - removes exception handlers whose try block is empty - * + eliminating a handler where the try block is empty and reachable will turn the catch block - * unreachable. in this case "unreachable code" is invoked recursively until reaching a fixpoint. - * > for try blocks that are unreachable, "unreachable code" removes also the instructions of the - * catch block, and the recursive invocation is not necessary. + * simplify jumps (various, like `GOTO l; l: ...`, see doc comments of individual optimizations) + * + enables UPSTREAM + * - unreachable code (`GOTO a; a: GOTO b; b: ...`, the first jump is changed to `GOTO b`, the second becomes unreachable) + * - store-load pairs (a `GOTO l; l: ...` is removed between store and load) + * - push-pop (`IFNULL l; l: ...` is replaced by `POP`) * - * simplify jumps - * - various simplifications, see doc comments of individual optimizations - * + changing or eliminating jumps may render some code unreachable, therefore "simplify jumps" is - * executed in a loop with "unreachable code" * - * empty local variable descriptors - * - removes entries from the local variable table where the variable is not actually used - * + enables eliminating labels that the entry points to (if they are not otherwise referenced) + * The following cleanup optimizations don't enable any upstream optimizations, so they can be + * executed once at the end, when the above optimizations reach a fixpoint. * - * empty line numbers - * - eliminates line number nodes that describe no executable instructions - * + enables eliminating the label of the line number node (if it's not otherwise referenced) * - * stale labels - * - eliminate labels that are not referenced, merge sequences of label definitions. + * empty local variable descriptors (removes unused variables from the local variable table) + * + enables downstream: + * - stale labels (labels that the entry points to, if not otherwise referenced) + * + * empty line numbers (eliminates line number nodes that describe no executable instructions) + * + enables downstream: + * - stale labels (label of the line number node, if not otherwise referenced) + * + * stale labels (eliminate labels that are not referenced, merge sequences of label definitions) + * + * + * Note on a method's maxLocals / maxStack: the backend only uses those values for running + * Analyzers. The values can be conservative approximations: if an optimization removes code and + * the maximal stack size is now smaller, the larger maxStack value will still work fine for + * running an Analyzer (just that frames allocate more space than required). The correct max + * values written to the bytecode are re-computed during classfile serialization. + * To keep things simpler, we don't update the max values in every optimization: + * - we do it in `removeUnreachableCodeImpl`, because it's quite straightforward + * - maxLocals is updated in `compactLocalVariables`, which runs at the end of method optimizations + * + * + * Note on updating the call graph: whenever an optimization eliminates a callsite or a closure + * instantiation, we eliminate the corresponding entry from the call graph. */ class LocalOpt[BT <: BTypes](val btypes: BT) { import LocalOptImpls._ import btypes._ + import coreBTypes._ + import backendUtils._ + + val boxUnbox = new BoxUnbox(btypes) + import boxUnbox._ + + val copyProp = new CopyProp(btypes) + import copyProp._ /** * Remove unreachable code from a method. * * This implementation only removes instructions that are unreachable for an ASM analyzer / * interpreter. This ensures that future analyses will not produce `null` frames. The inliner - * and call graph builder depend on this property. + * depends on this property. * * @return A set containing the eliminated instructions */ - def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Set[AbstractInsnNode] = { - if (method.instructions.size == 0) return Set.empty // fast path for abstract methods - if (unreachableCodeEliminated(method)) return Set.empty // we know there is no unreachable code + def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Boolean = { + // In principle, for the inliner, a single removeUnreachableCodeImpl would be enough. But that + // would potentially leave behind stale handlers (empty try block) which is not legal in the + // classfile. So we run both removeUnreachableCodeImpl and removeEmptyExceptionHandlers. + if (method.instructions.size == 0) return false // fast path for abstract methods + if (unreachableCodeEliminated(method)) return false // we know there is no unreachable code + if (!AsmAnalyzer.sizeOKForBasicValue(method)) return false // the method is too large for running an analyzer // For correctness, after removing unreachable code, we have to eliminate empty exception // handlers, see scaladoc of def methodOptimizations. Removing an live handler may render more // code unreachable and therefore requires running another round. - def removalRound(): Set[AbstractInsnNode] = { - val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName) - val removedRecursively = if (removedInstructions.nonEmpty) { + def removalRound(): Boolean = { + val (insnsRemoved, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName) + if (insnsRemoved) { val liveHandlerRemoved = removeEmptyExceptionHandlers(method).exists(h => liveLabels(h.start)) if (liveHandlerRemoved) removalRound() - else Set.empty - } else Set.empty - removedInstructions ++ removedRecursively + } + insnsRemoved } - val removedInstructions = removalRound() - if (removedInstructions.nonEmpty) removeUnusedLocalVariableNodes(method)() + val changed = removalRound() + if (changed) removeUnusedLocalVariableNodes(method)() unreachableCodeEliminated += method - removedInstructions + changed } /** @@ -90,21 +191,13 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { * @return `true` if unreachable code was eliminated in some method, `false` otherwise. */ def methodOptimizations(clazz: ClassNode): Boolean = { - !compilerSettings.YoptNone && clazz.methods.asScala.foldLeft(false) { + !compilerSettings.optNone && clazz.methods.asScala.foldLeft(false) { case (changed, method) => methodOptimizations(method, clazz.name) || changed } } /** - * Remove unreachable code from a method. - * - * We rely on dead code elimination provided by the ASM framework, as described in the ASM User - * Guide (http://asm.ow2.org/index.html), Section 8.2.1. It runs a data flow analysis, which only - * computes Frame information for reachable instructions. Instructions for which no Frame data is - * available after the analysis are unreachable. - * - * Also simplifies branching instructions, removes unused local variable descriptors, empty - * exception handlers, unnecessary label declarations and empty line number nodes. + * Run method-level optimizations, see comment on class [[LocalOpt]]. * * Returns `true` if the bytecode of `method` was changed. */ @@ -137,36 +230,151 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar // for local variables in dead blocks. Maybe that's a bug in the ASM framework. - def removalRound(): Boolean = { - // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt) - val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode) { - val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName) - val removedHandlers = removeEmptyExceptionHandlers(method) - (removedInstructions.nonEmpty, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start))) - } else { - (false, false, false) + var currentTrace: String = null + val methodPrefix = {val p = compilerSettings.YoptTrace.value; if (p == "_") "" else p } + val doTrace = compilerSettings.YoptTrace.isSetByUser && s"$ownerClassName.${method.name}".startsWith(methodPrefix) + def traceIfChanged(optName: String): Unit = if (doTrace) { + val after = AsmUtils.textify(method) + if (currentTrace != after) { + println(s"after $optName") + println(after) } - - val jumpsChanged = if (compilerSettings.YoptSimplifyJumps) simplifyJumps(method) else false - - // Eliminating live handlers and simplifying jump instructions may render more code - // unreachable, so we need to run another round. - if (liveHandlerRemoved || jumpsChanged) removalRound() - - codeRemoved || handlersRemoved || jumpsChanged + currentTrace = after } - val codeHandlersOrJumpsChanged = removalRound() + /** + * Runs the optimizations that depend on each other in a loop until reaching a fixpoint. See + * comment in class [[LocalOpt]]. + * + * Returns a pair of booleans (codeChanged, requireEliminateUnusedLocals). + */ + def removalRound( + requestNullness: Boolean, + requestDCE: Boolean, + requestBoxUnbox: Boolean, + requestStaleStores: Boolean, + requestPushPop: Boolean, + requestStoreLoad: Boolean, + firstIteration: Boolean, + maxRecursion: Int = 10): (Boolean, Boolean) = { + if (maxRecursion == 0) return (false, false) + + traceIfChanged("beforeMethodOpt") + + // NULLNESS OPTIMIZATIONS + val runNullness = compilerSettings.optNullnessTracking && requestNullness + val nullnessOptChanged = runNullness && nullnessOptimizations(method, ownerClassName) + traceIfChanged("nullness") + + // UNREACHABLE CODE + // Both AliasingAnalyzer (used in copyProp) and ProdConsAnalyzer (used in eliminateStaleStores, + // boxUnboxElimination) require not having unreachable instructions (null frames). + val runDCE = (compilerSettings.optUnreachableCode && (requestDCE || nullnessOptChanged)) || + compilerSettings.optBoxUnbox || + compilerSettings.optCopyPropagation + val (codeRemoved, liveLabels) = if (runDCE) removeUnreachableCodeImpl(method, ownerClassName) else (false, Set.empty[LabelNode]) + traceIfChanged("dce") + + // BOX-UNBOX + val runBoxUnbox = compilerSettings.optBoxUnbox && (requestBoxUnbox || nullnessOptChanged) + val boxUnboxChanged = runBoxUnbox && boxUnboxElimination(method, ownerClassName) + traceIfChanged("boxUnbox") + + // COPY PROPAGATION + val runCopyProp = compilerSettings.optCopyPropagation && (firstIteration || boxUnboxChanged) + val copyPropChanged = runCopyProp && copyPropagation(method, ownerClassName) + traceIfChanged("copyProp") + + // STALE STORES + val runStaleStores = compilerSettings.optCopyPropagation && (requestStaleStores || nullnessOptChanged || codeRemoved || boxUnboxChanged || copyPropChanged) + val storesRemoved = runStaleStores && eliminateStaleStores(method, ownerClassName) + traceIfChanged("staleStores") + + // REDUNDANT CASTS + val runRedundantCasts = compilerSettings.optRedundantCasts && (firstIteration || boxUnboxChanged) + val castRemoved = runRedundantCasts && eliminateRedundantCasts(method, ownerClassName) + traceIfChanged("redundantCasts") + + // PUSH-POP + val runPushPop = compilerSettings.optCopyPropagation && (requestPushPop || firstIteration || storesRemoved || castRemoved) + val pushPopRemoved = runPushPop && eliminatePushPop(method, ownerClassName) + traceIfChanged("pushPop") + + // STORE-LOAD PAIRS + val runStoreLoad = compilerSettings.optCopyPropagation && (requestStoreLoad || boxUnboxChanged || copyPropChanged || pushPopRemoved) + val storeLoadRemoved = runStoreLoad && eliminateStoreLoad(method) + traceIfChanged("storeLoadPairs") + + // STALE HANDLERS + val removedHandlers = if (runDCE) removeEmptyExceptionHandlers(method) else Set.empty[TryCatchBlockNode] + val handlersRemoved = removedHandlers.nonEmpty + val liveHandlerRemoved = removedHandlers.exists(h => liveLabels(h.start)) + traceIfChanged("staleHandlers") + + // SIMPLIFY JUMPS + // almost all of the above optimizations enable simplifying more jumps, so we just run it in every iteration + val runSimplifyJumps = compilerSettings.optSimplifyJumps + val jumpsChanged = runSimplifyJumps && simplifyJumps(method) + traceIfChanged("simplifyJumps") + + // See doc comment in the beginning of this file (optimizations marked UPSTREAM) + val runNullnessAgain = boxUnboxChanged + val runDCEAgain = liveHandlerRemoved || jumpsChanged + val runBoxUnboxAgain = boxUnboxChanged || castRemoved || pushPopRemoved || liveHandlerRemoved + val runStaleStoresAgain = pushPopRemoved + val runPushPopAgain = jumpsChanged + val runStoreLoadAgain = jumpsChanged + val runAgain = runNullnessAgain || runDCEAgain || runBoxUnboxAgain || pushPopRemoved || runStaleStoresAgain || runPushPopAgain || runStoreLoadAgain + + val downstreamRequireEliminateUnusedLocals = runAgain && removalRound( + requestNullness = runNullnessAgain, + requestDCE = runDCEAgain, + requestBoxUnbox = runBoxUnboxAgain, + requestStaleStores = runStaleStoresAgain, + requestPushPop = runPushPopAgain, + requestStoreLoad = runStoreLoadAgain, + firstIteration = false, + maxRecursion = maxRecursion - 1)._2 + + val requireEliminateUnusedLocals = downstreamRequireEliminateUnusedLocals || + nullnessOptChanged || // nullness opt may eliminate stores / loads, rendering a local unused + codeRemoved || // see comment in method `methodOptimizations` + boxUnboxChanged || // box-unbox renders locals (holding boxes) unused + storesRemoved || + storeLoadRemoved || + handlersRemoved + + val codeChanged = nullnessOptChanged || codeRemoved || boxUnboxChanged || castRemoved || copyPropChanged || storesRemoved || pushPopRemoved || storeLoadRemoved || handlersRemoved || jumpsChanged + (codeChanged, requireEliminateUnusedLocals) + } - // (*) Removing stale local variable descriptors is required for correctness of unreachable-code + val (nullnessDceBoxesCastsCopypropPushpopOrJumpsChanged, requireEliminateUnusedLocals) = if (AsmAnalyzer.sizeOKForBasicValue(method)) { + // we run DCE even if the method is already in the `unreachableCodeEliminated` map: the DCE + // here is more thorough than `minimalRemoveUnreachableCode` that run before inlining. + val r = removalRound( + requestNullness = true, + requestDCE = true, + requestBoxUnbox = true, + requestStaleStores = true, + requestPushPop = true, + requestStoreLoad = true, + firstIteration = true) + if (compilerSettings.optUnreachableCode) unreachableCodeEliminated += method + r + } else (false, false) + + // (*) Removing stale local variable descriptors is required for correctness, see comment in `methodOptimizations` val localsRemoved = - if (compilerSettings.YoptCompactLocals) compactLocalVariables(method) // also removes unused - else if (compilerSettings.YoptUnreachableCode) removeUnusedLocalVariableNodes(method)() // (*) + if (compilerSettings.optCompactLocals) compactLocalVariables(method) // also removes unused + else if (requireEliminateUnusedLocals) removeUnusedLocalVariableNodes(method)() // (*) else false + traceIfChanged("localVariables") - val lineNumbersRemoved = if (compilerSettings.YoptEmptyLineNumbers) removeEmptyLineNumbers(method) else false + val lineNumbersRemoved = if (compilerSettings.optUnreachableCode) removeEmptyLineNumbers(method) else false + traceIfChanged("lineNumbers") - val labelsRemoved = if (compilerSettings.YoptEmptyLabels) removeEmptyLabelNodes(method) else false + val labelsRemoved = if (compilerSettings.optUnreachableCode) removeEmptyLabelNodes(method) else false + traceIfChanged("labels") // assert that local variable annotations are empty (we don't emit them) - otherwise we'd have // to eliminate those covering an empty range, similar to removeUnusedLocalVariableNodes. @@ -174,53 +382,198 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { assert(nullOrEmpty(method.visibleLocalVariableAnnotations), method.visibleLocalVariableAnnotations) assert(nullOrEmpty(method.invisibleLocalVariableAnnotations), method.invisibleLocalVariableAnnotations) - unreachableCodeEliminated += method - - codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved + nullnessDceBoxesCastsCopypropPushpopOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved } -} + /** + * Apply various optimizations based on nullness analysis information. + * - IFNULL / IFNONNULL are rewritten to GOTO if nullness is known + * - IF_ACMPEQ / IF_ACMPNE are rewritten to GOTO if the both references are known null, or if + * one is known null and the other known not-null + * - ALOAD is replaced by ACONST_NULL if the local is known to hold null + * - ASTORE of null is removed if the local is known to hold null + * - INSTANCEOF of null is replaced by `ICONST_0` + * - scala.runtime.BoxesRunTime.unboxToX(null) is rewritten to a zero-value load + */ + def nullnessOptimizations(method: MethodNode, ownerClassName: InternalName): Boolean = { + AsmAnalyzer.sizeOKForNullness(method) && { + lazy val nullnessAnalyzer = new AsmAnalyzer(method, ownerClassName, new NullnessAnalyzer(btypes, method)) + + // When running nullness optimizations the method may still have unreachable code. Analyzer + // frames of unreachable instructions are `null`. + def frameAt(insn: AbstractInsnNode): Option[Frame[NullnessValue]] = Option(nullnessAnalyzer.frameAt(insn)) + + def nullness(insn: AbstractInsnNode, slot: Int): Option[NullnessValue] = { + frameAt(insn).map(_.getValue(slot)) + } + + def isNull(insn: AbstractInsnNode, slot: Int) = nullness(insn, slot).contains(NullValue) + + // cannot change instructions while iterating, it gets the analysis out of synch (indexed by instructions) + val toReplace = mutable.Map.empty[AbstractInsnNode, List[AbstractInsnNode]] + + val it = method.instructions.iterator() + while (it.hasNext) it.next() match { + case vi: VarInsnNode if isNull(vi, vi.`var`) => + if (vi.getOpcode == ALOAD) + toReplace(vi) = List(new InsnNode(ACONST_NULL)) + else if (vi.getOpcode == ASTORE) + for (frame <- frameAt(vi) if frame.peekStack(0) == NullValue) + toReplace(vi) = List(getPop(1)) + + case ji: JumpInsnNode => + val isIfNull = ji.getOpcode == IFNULL + val isIfNonNull = ji.getOpcode == IFNONNULL + if (isIfNull || isIfNonNull) for (frame <- frameAt(ji)) { + val nullness = frame.peekStack(0) + val taken = nullness == NullValue && isIfNull || nullness == NotNullValue && isIfNonNull + val avoided = nullness == NotNullValue && isIfNull || nullness == NullValue && isIfNonNull + if (taken || avoided) { + val jump = if (taken) List(new JumpInsnNode(GOTO, ji.label)) else Nil + toReplace(ji) = getPop(1) :: jump + } + } else { + val isIfEq = ji.getOpcode == IF_ACMPEQ + val isIfNe = ji.getOpcode == IF_ACMPNE + if (isIfEq || isIfNe) for (frame <- frameAt(ji)) { + val aNullness = frame.peekStack(1) + val bNullness = frame.peekStack(0) + val eq = aNullness == NullValue && bNullness == NullValue + val ne = aNullness == NullValue && bNullness == NotNullValue || aNullness == NotNullValue && bNullness == NullValue + val taken = isIfEq && eq || isIfNe && ne + val avoided = isIfEq && ne || isIfNe && eq + if (taken || avoided) { + val jump = if (taken) List(new JumpInsnNode(GOTO, ji.label)) else Nil + toReplace(ji) = getPop(1) :: getPop(1) :: jump + } + } + } + + case ti: TypeInsnNode => + if (ti.getOpcode == INSTANCEOF) for (frame <- frameAt(ti) if frame.peekStack(0) == NullValue) { + toReplace(ti) = List(getPop(1), new InsnNode(ICONST_0)) + } + + case mi: MethodInsnNode => + if (isScalaUnbox(mi)) for (frame <- frameAt(mi) if frame.peekStack(0) == NullValue) { + toReplace(mi) = List( + getPop(1), + loadZeroForTypeSort(Type.getReturnType(mi.desc).getSort)) + } + + case _ => + } + + def removeFromCallGraph(insn: AbstractInsnNode): Unit = insn match { + case mi: MethodInsnNode => callGraph.removeCallsite(mi, method) + case _ => + } + + for ((oldOp, newOps) <- toReplace) { + for (newOp <- newOps) method.instructions.insertBefore(oldOp, newOp) + method.instructions.remove(oldOp) + removeFromCallGraph(oldOp) + } + + toReplace.nonEmpty + } + } -object LocalOptImpls { /** * Removes unreachable basic blocks. * - * TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow. - * * @return A set containing eliminated instructions, and a set containing all live label nodes. */ - def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Set[AbstractInsnNode], Set[LabelNode]) = { - // The data flow analysis requires the maxLocals / maxStack fields of the method to be computed. - computeMaxLocalsMaxStack(method) - val a = new Analyzer(new BasicInterpreter) - a.analyze(ownerClassName, method) - val frames = a.getFrames + def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Boolean, Set[LabelNode]) = { + val a = new AsmAnalyzer(method, ownerClassName) + val frames = a.analyzer.getFrames - val initialSize = method.instructions.size var i = 0 var liveLabels = Set.empty[LabelNode] - var removedInstructions = Set.empty[AbstractInsnNode] + var changed = false + var maxLocals = parametersSize(method) + var maxStack = 0 val itr = method.instructions.iterator() while (itr.hasNext) { - itr.next() match { - case l: LabelNode => - if (frames(i) != null) liveLabels += l + val insn = itr.next() + val isLive = frames(i) != null + if (isLive) maxStack = math.max(maxStack, frames(i).getStackSize) - case ins => + insn match { + case l: LabelNode => // label nodes are not removed: they might be referenced for example in a LocalVariableNode - if (frames(i) == null || ins.getOpcode == Opcodes.NOP) { + if (isLive) liveLabels += l + + case v: VarInsnNode if isLive => + val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0 + maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 because local numbers are 0-based + + case i: IincInsnNode if isLive => + maxLocals = math.max(maxLocals, i.`var` + 1) + + case _ => + if (!isLive || insn.getOpcode == NOP) { // Instruction iterators allow removing during iteration. // Removing is O(1): instructions are doubly linked list elements. itr.remove() - removedInstructions += ins + changed = true + insn match { + case invocation: MethodInsnNode => callGraph.removeCallsite(invocation, method) + case indy: InvokeDynamicInsnNode => callGraph.removeClosureInstantiation(indy, method) + case _ => + } } } i += 1 } - (removedInstructions, liveLabels) + method.maxLocals = maxLocals + method.maxStack = maxStack + (changed, liveLabels) } /** + * Eliminate `CHECKCAST` instructions that are statically known to succeed. This is safe if the + * tested object is null: `null.asInstanceOf` always succeeds. + * + * The type of the tested object is determined using a NonLubbingTypeFlowAnalyzer. Note that this + * analysis collapses LUBs of non-equal references types to Object for simplicity. Example: + * given `B <: A <: Object`, the cast in `(if (..) new B else new A).asInstanceOf[A]` would not + * be eliminated. + * + * Note: we cannot replace `INSTANCEOF` tests by only looking at the types, `null.isInstanceOf` + * always returns false, so we'd also need nullness information. + */ + def eliminateRedundantCasts(method: MethodNode, owner: InternalName): Boolean = { + AsmAnalyzer.sizeOKForBasicValue(method) && { + def isSubType(aRefDesc: String, bClass: InternalName): Boolean = aRefDesc == bClass || bClass == ObjectRef.internalName || { + (bTypeForDescriptorOrInternalNameFromClassfile(aRefDesc) conformsTo classBTypeFromParsedClassfile(bClass)).getOrElse(false) + } + + lazy val typeAnalyzer = new NonLubbingTypeFlowAnalyzer(method, owner) + + // cannot remove instructions while iterating, it gets the analysis out of synch (indexed by instructions) + val toRemove = mutable.Set.empty[TypeInsnNode] + + val it = method.instructions.iterator() + while (it.hasNext) it.next() match { + case ti: TypeInsnNode if ti.getOpcode == CHECKCAST => + val frame = typeAnalyzer.frameAt(ti) + val valueTp = frame.getValue(frame.stackTop) + if (valueTp.isReference && isSubType(valueTp.getType.getDescriptor, ti.desc)) { + toRemove += ti + } + + case _ => + } + + toRemove foreach method.instructions.remove + toRemove.nonEmpty + } + } +} + +object LocalOptImpls { + /** * Remove exception handlers that cover empty code blocks. A block is considered empty if it * consist only of labels, frames, line numbers, nops and gotos. * @@ -235,16 +588,16 @@ object LocalOptImpls { def removeEmptyExceptionHandlers(method: MethodNode): Set[TryCatchBlockNode] = { /** True if there exists code between start and end. */ def containsExecutableCode(start: AbstractInsnNode, end: LabelNode): Boolean = { - start != end && ((start.getOpcode : @switch) match { + start != end && ((start.getOpcode: @switch) match { // FrameNode, LabelNode and LineNumberNode have opcode == -1. - case -1 | Opcodes.GOTO => containsExecutableCode(start.getNext, end) + case -1 | GOTO => containsExecutableCode(start.getNext, end) case _ => true }) } var removedHandlers = Set.empty[TryCatchBlockNode] val handlersIter = method.tryCatchBlocks.iterator() - while(handlersIter.hasNext) { + while (handlersIter.hasNext) { val handler = handlersIter.next() if (!containsExecutableCode(handler.start, handler.end)) { removedHandlers += handler @@ -263,9 +616,10 @@ object LocalOptImpls { * same type or name. */ def removeUnusedLocalVariableNodes(method: MethodNode)(firstLocalIndex: Int = parametersSize(method), renumber: Int => Int = identity): Boolean = { - def variableIsUsed(start: AbstractInsnNode, end: LabelNode, varIndex: Int): Boolean = { + @tailrec def variableIsUsed(start: AbstractInsnNode, end: LabelNode, varIndex: Int): Boolean = { start != end && (start match { case v: VarInsnNode if v.`var` == varIndex => true + case i: IincInsnNode if i.`var` == varIndex => true case _ => variableIsUsed(start.getNext, end, varIndex) }) } @@ -285,17 +639,6 @@ object LocalOptImpls { } /** - * The number of local variable slots used for parameters and for the `this` reference. - */ - private def parametersSize(method: MethodNode): Int = { - // Double / long fields occupy two slots, so we sum up the sizes. Since getSize returns 0 for - // void, we have to add `max 1`. - val paramsSize = scala.tools.asm.Type.getArgumentTypes(method.desc).iterator.map(_.getSize max 1).sum - val thisSize = if ((method.access & Opcodes.ACC_STATIC) == 0) 1 else 0 - paramsSize + thisSize - } - - /** * Compact the local variable slots used in the method's implementation. This prevents having * unused slots for example after eliminating unreachable code. * @@ -310,12 +653,9 @@ object LocalOptImpls { val renumber = collection.mutable.ArrayBuffer.empty[Int] // Add the index of the local variable used by `varIns` to the `renumber` array. - def addVar(varIns: VarInsnNode): Unit = { - val index = varIns.`var` - val isWide = (varIns.getOpcode: @switch) match { - case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true - case _ => false - } + def addVar(varIns: AbstractInsnNode, slot: Int): Unit = { + val index = slot + val isWide = isSize2LoadOrStore(varIns.getOpcode) // Ensure the length of `renumber`. Unused variable indices are mapped to -1. val minLength = if (isWide) index + 2 else index + 1 @@ -332,7 +672,7 @@ object LocalOptImpls { val firstLocalIndex = parametersSize(method) for (i <- 0 until firstLocalIndex) renumber += i // parameters and `this` are always used. method.instructions.iterator().asScala foreach { - case VarInstruction(varIns) => addVar(varIns) + case VarInstruction(varIns, slot) => addVar(varIns, slot) case _ => } @@ -353,10 +693,12 @@ object LocalOptImpls { // update variable instructions according to the renumber table method.maxLocals = nextIndex method.instructions.iterator().asScala.foreach { - case VarInstruction(varIns) => - val oldIndex = varIns.`var` - if (oldIndex >= firstLocalIndex && renumber(oldIndex) != oldIndex) - varIns.`var` = renumber(varIns.`var`) + case VarInstruction(varIns, slot) => + val oldIndex = slot + if (oldIndex >= firstLocalIndex && renumber(oldIndex) != oldIndex) varIns match { + case vi: VarInsnNode => vi.`var` = renumber(slot) + case ii: IincInsnNode => ii.`var` = renumber(slot) + } case _ => } true @@ -431,154 +773,181 @@ object LocalOptImpls { // A set of all exception handlers that guard the current instruction, required for simplifyGotoReturn var activeHandlers = Set.empty[TryCatchBlockNode] - // Instructions that need to be removed. simplifyBranchOverGoto returns an instruction to be - // removed. It cannot remove it itself because the instruction may be the successor of the current - // instruction of the iterator, which is not supported in ASM. - var instructionsToRemove = Set.empty[AbstractInsnNode] + val jumpInsns = mutable.LinkedHashMap.empty[JumpInsnNode, Boolean] - val iterator = method.instructions.iterator() - while (iterator.hasNext) { - val instruction = iterator.next() + for (insn <- method.instructions.iterator().asScala) insn match { + case l: LabelNode => + activeHandlers ++= allHandlers.filter(_.start == l) + activeHandlers = activeHandlers.filter(_.end != l) - instruction match { - case l: LabelNode => - activeHandlers ++= allHandlers.filter(_.start == l) - activeHandlers = activeHandlers.filter(_.end != l) - case _ => + case ji: JumpInsnNode => + jumpInsns(ji) = activeHandlers.nonEmpty + + case _ => + } + + var _jumpTargets: Set[AbstractInsnNode] = null + def jumpTargets = { + if (_jumpTargets == null) { + _jumpTargets = jumpInsns.keysIterator.map(_.label).toSet } + _jumpTargets + } - if (instructionsToRemove(instruction)) { - iterator.remove() - instructionsToRemove -= instruction - } else if (isJumpNonJsr(instruction)) { // fast path - all of the below only treat jumps - var jumpRemoved = simplifyThenElseSameTarget(method, instruction) + def removeJumpFromMap(jump: JumpInsnNode) = { + jumpInsns.remove(jump) + _jumpTargets = null + } - if (!jumpRemoved) { - changed = collapseJumpChains(instruction) || changed - jumpRemoved = removeJumpToSuccessor(method, instruction) + def replaceJumpByPop(jump: JumpInsnNode) = { + removeJumpAndAdjustStack(method, jump) + removeJumpFromMap(jump) + } - if (!jumpRemoved) { - val staleGoto = simplifyBranchOverGoto(method, instruction) - instructionsToRemove ++= staleGoto - changed ||= staleGoto.nonEmpty - changed = simplifyGotoReturn(method, instruction, inTryBlock = activeHandlers.nonEmpty) || changed - } + /** + * Removes a conditional jump if it is followed by a GOTO to the same destination. + * + * CondJump l; [nops]; GOTO l; [...] + * POP*; [nops]; GOTO l; [...] + * + * Introduces 1 or 2 POP instructions, depending on the number of values consumed by the CondJump. + */ + def simplifyThenElseSameTarget(insn: AbstractInsnNode): Boolean = insn match { + case ConditionalJump(jump) => + nextExecutableInstruction(insn) match { + case Some(Goto(elseJump)) if sameTargetExecutableInstruction(jump, elseJump) => + replaceJumpByPop(jump) + true + + case _ => false } - changed ||= jumpRemoved - } + + case _ => false } - assert(instructionsToRemove.isEmpty, "some optimization required removing a previously traversed instruction. add `instructionsToRemove.foreach(method.instructions.remove)`") - changed - } - /** - * Removes a conditional jump if it is followed by a GOTO to the same destination. - * - * CondJump l; [nops]; GOTO l; [...] - * POP*; [nops]; GOTO l; [...] - * - * Introduces 1 or 2 POP instructions, depending on the number of values consumed by the CondJump. - */ - private def simplifyThenElseSameTarget(method: MethodNode, instruction: AbstractInsnNode): Boolean = instruction match { - case ConditionalJump(jump) => - nextExecutableInstruction(instruction) match { - case Some(Goto(elseJump)) if sameTargetExecutableInstruction(jump, elseJump) => - removeJumpAndAdjustStack(method, jump) + /** + * Replace jumps to a sequence of GOTO instructions by a jump to the final destination. + * + * {{{ + * Jump l; [any ops]; l: GOTO m; [any ops]; m: GOTO n; [any ops]; n: NotGOTO; [...] + * => Jump n; [rest unchanged] + * }}} + * + * If there's a loop of GOTOs, the initial jump is replaced by one of the labels in the loop. + */ + def collapseJumpChains(insn: AbstractInsnNode): Boolean = insn match { + case JumpNonJsr(jump) => + val target = finalJumpTarget(jump) + if (jump.label == target) false else { + jump.label = target + _jumpTargets = null true + } - case _ => false - } - case _ => false - } + case _ => false + } - /** - * Replace jumps to a sequence of GOTO instructions by a jump to the final destination. - * - * Jump l; [any ops]; l: GOTO m; [any ops]; m: GOTO n; [any ops]; n: NotGOTO; [...] - * => Jump n; [rest unchanged] - * - * If there's a loop of GOTOs, the initial jump is replaced by one of the labels in the loop. - */ - private def collapseJumpChains(instruction: AbstractInsnNode): Boolean = instruction match { - case JumpNonJsr(jump) => - val target = finalJumpTarget(jump) - if (jump.label == target) false else { - jump.label = target + /** + * Eliminates unnecessary jump instructions + * + * {{{ + * Jump l; [nops]; l: [...] + * => POP*; [nops]; l: [...] + * }}} + * + * Introduces 0, 1 or 2 POP instructions, depending on the number of values consumed by the Jump. + */ + def removeJumpToSuccessor(insn: AbstractInsnNode): Boolean = insn match { + case JumpNonJsr(jump) if nextExecutableInstruction(jump, alsoKeep = Set(jump.label)) contains jump.label => + replaceJumpByPop(jump) true - } - case _ => false - } + case _ => false + } - /** - * Eliminates unnecessary jump instructions - * - * Jump l; [nops]; l: [...] - * => POP*; [nops]; l: [...] - * - * Introduces 0, 1 or 2 POP instructions, depending on the number of values consumed by the Jump. - */ - private def removeJumpToSuccessor(method: MethodNode, instruction: AbstractInsnNode) = instruction match { - case JumpNonJsr(jump) if nextExecutableInstruction(jump, alsoKeep = Set(jump.label)) == Some(jump.label) => - removeJumpAndAdjustStack(method, jump) - true - case _ => false - } + /** + * If the "else" part of a conditional branch is a simple GOTO, negates the conditional branch + * and eliminates the GOTO. + * + * {{{ + * CondJump l; [nops, no jump targets]; GOTO m; [nops]; l: [...] + * => NegatedCondJump m; [nops, no jump targets]; [nops]; l: [...] + * }}} + * + * Note that no jump targets are allowed in the first [nops] section. Otherwise, there could + * be some other jump to the GOTO, and eliminating it would change behavior. + */ + def simplifyBranchOverGoto(insn: AbstractInsnNode, inTryBlock: Boolean): Boolean = insn match { + case ConditionalJump(jump) => + // don't skip over jump targets, see doc comment + nextExecutableInstruction(jump, alsoKeep = jumpTargets) match { + case Some(Goto(goto)) => + if (nextExecutableInstruction(goto, alsoKeep = Set(jump.label)) contains jump.label) { + val newJump = new JumpInsnNode(negateJumpOpcode(jump.getOpcode), goto.label) + method.instructions.set(jump, newJump) + removeJumpFromMap(jump) + jumpInsns(newJump) = inTryBlock + replaceJumpByPop(goto) + true + } else false + + case _ => false + } + case _ => false + } - /** - * If the "else" part of a conditional branch is a simple GOTO, negates the conditional branch - * and eliminates the GOTO. - * - * CondJump l; [nops, no labels]; GOTO m; [nops]; l: [...] - * => NegatedCondJump m; [nops, no labels]; [nops]; l: [...] - * - * Note that no label definitions are allowed in the first [nops] section. Otherwise, there could - * be some other jump to the GOTO, and eliminating it would change behavior. - * - * For technical reasons, we cannot remove the GOTO here (*).Instead this method returns an Option - * containing the GOTO that needs to be eliminated. - * - * (*) The ASM instruction iterator (used in the caller [[simplifyJumps]]) has an undefined - * behavior if the successor of the current instruction is removed, which may be the case here - */ - private def simplifyBranchOverGoto(method: MethodNode, instruction: AbstractInsnNode): Option[JumpInsnNode] = instruction match { - case ConditionalJump(jump) => - // don't skip over labels, see doc comment - nextExecutableInstruction(jump, alsoKeep = _.isInstanceOf[LabelNode]) match { - case Some(Goto(goto)) => - if (nextExecutableInstruction(goto, alsoKeep = Set(jump.label)) == Some(jump.label)) { - val newJump = new JumpInsnNode(negateJumpOpcode(jump.getOpcode), goto.label) - method.instructions.set(jump, newJump) - Some(goto) - } else None - - case _ => None - } - case _ => None - } + /** + * Inlines xRETURN and ATHROW + * + * {{{ + * GOTO l; [any ops]; l: xRETURN/ATHROW + * => xRETURN/ATHROW; [any ops]; l: xRETURN/ATHROW + * }}} + * + * inlining is only done if the GOTO instruction is not part of a try block, otherwise the + * rewrite might change the behavior. For xRETURN, the reason is that return instructions may throw + * an IllegalMonitorStateException, as described here: + * http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-6.html#jvms-6.5.return + */ + def simplifyGotoReturn(instruction: AbstractInsnNode, inTryBlock: Boolean): Boolean = !inTryBlock && (instruction match { + case Goto(jump) => + nextExecutableInstruction(jump.label) match { + case Some(target) => + if (isReturn(target) || target.getOpcode == ATHROW) { + method.instructions.set(jump, target.clone(null)) + removeJumpFromMap(jump) + true + } else false + + case _ => false + } + case _ => false + }) - /** - * Inlines xRETURN and ATHROW - * - * GOTO l; [any ops]; l: xRETURN/ATHROW - * => xRETURN/ATHROW; [any ops]; l: xRETURN/ATHROW - * - * inlining is only done if the GOTO instruction is not part of a try block, otherwise the - * rewrite might change the behavior. For xRETURN, the reason is that return instructions may throw - * an IllegalMonitorStateException, as described here: - * http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-6.html#jvms-6.5.return - */ - private def simplifyGotoReturn(method: MethodNode, instruction: AbstractInsnNode, inTryBlock: Boolean): Boolean = !inTryBlock && (instruction match { - case Goto(jump) => - nextExecutableInstruction(jump.label) match { - case Some(target) => - if (isReturn(target) || target.getOpcode == Opcodes.ATHROW) { - method.instructions.set(jump, target.clone(null)) - true - } else false + def run(): Boolean = { + var changed = false + + // `.toList` because we're modifying the map while iterating over it + for ((jumpInsn, inTryBlock) <- jumpInsns.toList if jumpInsns.contains(jumpInsn) && isJumpNonJsr(jumpInsn)) { + var jumpRemoved = simplifyThenElseSameTarget(jumpInsn) + + if (!jumpRemoved) { + changed = collapseJumpChains(jumpInsn) || changed + jumpRemoved = removeJumpToSuccessor(jumpInsn) + + if (!jumpRemoved) { + changed = simplifyBranchOverGoto(jumpInsn, inTryBlock) || changed + changed = simplifyGotoReturn(jumpInsn, inTryBlock) || changed + } + } - case _ => false + changed ||= jumpRemoved } - case _ => false - }) + + if (changed) run() + changed + } + + run() + } } |