diff options
Diffstat (limited to 'src')
24 files changed, 1969 insertions, 727 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala index cd7e0b83e8..18a495e5fd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala @@ -55,6 +55,13 @@ object AsmUtils { node } + def readClass(filename: String): ClassNode = { + val f = new java.io.RandomAccessFile(filename, "r") + val b = new Array[Byte](f.length.toInt) + f.read(b) + readClass(b) + } + /** * Returns a human-readable representation of the cnode ClassNode. */ diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala index 93f5159f89..42738d3e1c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala @@ -390,6 +390,17 @@ final class BCodeAsmCommon[G <: Global](val global: G) { val isEffectivelyFinal = classSym.isEffectivelyFinal + val sam = { + if (classSym.isImplClass || classSym.isEffectivelyFinal) None + else { + // Phase travel necessary. For example, nullary methods (getter of an abstract val) get an + // empty parameter list in later phases and would therefore be picked as SAM. + val samSym = exitingPickler(definitions.findSam(classSym.tpe)) + if (samSym == NoSymbol) None + else Some(samSym.javaSimpleName.toString + methodSymToDescriptor(samSym)) + } + } + var warning = Option.empty[ClassSymbolInfoFailureSI9111] // Primitive methods cannot be inlined, so there's no point in building a MethodInlineInfo. Also, some @@ -447,7 +458,7 @@ final class BCodeAsmCommon[G <: Global](val global: G) { } }).toMap - InlineInfo(traitSelfType, isEffectivelyFinal, methodInlineInfos, warning) + InlineInfo(traitSelfType, isEffectivelyFinal, sam, methodInlineInfos, warning) } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala index a9b6a312e9..075a44ca22 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala @@ -9,7 +9,6 @@ package backend package jvm import scala.collection.{ mutable, immutable } -import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository import scala.tools.nsc.symtab._ import scala.tools.asm @@ -140,11 +139,6 @@ abstract class BCodeSkelBuilder extends BCodeHelpers { if (AsmUtils.traceClassEnabled && cnode.name.contains(AsmUtils.traceClassPattern)) AsmUtils.traceClass(cnode) - if (settings.YoptAddToBytecodeRepository) { - // The inliner needs to find all classes in the code repo, also those being compiled - byteCodeRepository.add(cnode, ByteCodeRepository.CompilationUnit) - } - assert(cd.symbol == claszSymbol, "Someone messed up BCodePhase.claszSymbol during genPlainClass().") } // end of method genPlainClass() diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala index 0c26e01322..aff2d2d8c9 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala @@ -11,9 +11,10 @@ import scala.collection.concurrent.TrieMap import scala.reflect.internal.util.Position import scala.tools.asm import asm.Opcodes -import scala.tools.asm.tree.{MethodNode, MethodInsnNode, InnerClassNode, ClassNode} +import scala.tools.asm.tree._ import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo} import scala.tools.nsc.backend.jvm.BackendReporting._ +import scala.tools.nsc.backend.jvm.analysis.Analyzers import scala.tools.nsc.backend.jvm.opt._ import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.settings.ScalaSettings @@ -38,16 +39,20 @@ abstract class BTypes { /** * Tools for parsing classfiles, used by the inliner. */ - val byteCodeRepository: ByteCodeRepository + val byteCodeRepository: ByteCodeRepository[this.type] val localOpt: LocalOpt[this.type] val inliner: Inliner[this.type] + val inlinerHeuristics: InlinerHeuristics[this.type] + val closureOptimizer: ClosureOptimizer[this.type] val callGraph: CallGraph[this.type] + val analyzers: Analyzers[this.type] + val backendReporting: BackendReporting // Allows to define per-run caches here and in the CallGraph component, which don't have a global @@ -56,7 +61,6 @@ abstract class BTypes { // Allows access to the compiler settings for backend components that don't have a global in scope def compilerSettings: ScalaSettings - /** * A map from internal names to ClassBTypes. Every ClassBType is added to this map on its * construction. @@ -95,6 +99,13 @@ abstract class BTypes { val unreachableCodeEliminated: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty) /** + * Cache of methods which have correct `maxLocals` / `maxStack` values assigned. This allows + * invoking `computeMaxLocalsMaxStack` whenever running an analyzer but performing the actual + * computation only when necessary. + */ + val maxLocalsMaxStackComputed: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty) + + /** * Obtain the BType for a type descriptor or internal name. For class descriptors, the ClassBType * is constructed by parsing the corresponding classfile. * @@ -234,6 +245,7 @@ abstract class BTypes { InlineInfo( traitImplClassSelfType = None, isEffectivelyFinal = BytecodeUtils.isFinalClass(classNode), + sam = inlinerHeuristics.javaSam(classNode.name), methodInfos = methodInfos, warning) } @@ -554,6 +566,8 @@ abstract class BTypes { * Terminology * ----------- * + * Diagram here: https://blogs.oracle.com/darcy/entry/nested_inner_member_and_top + * * - Nested class (JLS 8): class whose declaration occurs within the body of another class * * - Top-level class (JLS 8): non-nested class @@ -1104,6 +1118,8 @@ object BTypes { * Metadata about a ClassBType, used by the inliner. * * More information may be added in the future to enable more elaborate inlinine heuristics. + * Note that this class should contain information that can only be obtained from the ClassSymbol. + * Information that can be computed from the ClassNode should be added to the call graph instead. * * @param traitImplClassSelfType `Some(tp)` if this InlineInfo describes a trait, and the `self` * parameter type of the methods in the implementation class is not @@ -1122,6 +1138,8 @@ object BTypes { * @param isEffectivelyFinal True if the class cannot have subclasses: final classes, module * classes, trait impl classes. * + * @param sam If this class is a SAM type, the SAM's "$name$descriptor". + * * @param methodInfos The [[MethodInlineInfo]]s for the methods declared in this class. * The map is indexed by the string s"$name$descriptor" (to * disambiguate overloads). @@ -1132,10 +1150,11 @@ object BTypes { */ final case class InlineInfo(traitImplClassSelfType: Option[InternalName], isEffectivelyFinal: Boolean, + sam: Option[String], methodInfos: Map[String, MethodInlineInfo], warning: Option[ClassInlineInfoWarning]) - val EmptyInlineInfo = InlineInfo(None, false, Map.empty, None) + val EmptyInlineInfo = InlineInfo(None, false, None, Map.empty, None) /** * Metadata about a method, used by the inliner. diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index 45d9cc3ff3..8cccc50c69 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -7,8 +7,9 @@ package scala.tools.nsc package backend.jvm import scala.tools.asm +import scala.tools.nsc.backend.jvm.analysis.Analyzers import scala.tools.nsc.backend.jvm.opt._ -import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo, InternalName} +import scala.tools.nsc.backend.jvm.BTypes._ import BackendReporting._ import scala.tools.nsc.settings.ScalaSettings @@ -36,16 +37,20 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { val coreBTypes = new CoreBTypesProxy[this.type](this) import coreBTypes._ - val byteCodeRepository = new ByteCodeRepository(global.classPath, javaDefinedClasses, recordPerRunCache(collection.concurrent.TrieMap.empty)) + val byteCodeRepository: ByteCodeRepository[this.type] = new ByteCodeRepository(global.classPath, this) val localOpt: LocalOpt[this.type] = new LocalOpt(this) val inliner: Inliner[this.type] = new Inliner(this) + val inlinerHeuristics: InlinerHeuristics[this.type] = new InlinerHeuristics(this) + val closureOptimizer: ClosureOptimizer[this.type] = new ClosureOptimizer(this) val callGraph: CallGraph[this.type] = new CallGraph(this) + val analyzers: Analyzers[this.type] = new Analyzers(this) + val backendReporting: BackendReporting = new BackendReportingImpl(global) final def initializeCoreBTypes(): Unit = { @@ -444,7 +449,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { case Right(classNode) => inlineInfoFromClassfile(classNode) case Left(missingClass) => - InlineInfo(None, false, Map.empty, Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass))) + EmptyInlineInfo.copy(warning = Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass))) } } } @@ -467,7 +472,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { flags = asm.Opcodes.ACC_SUPER | asm.Opcodes.ACC_PUBLIC | asm.Opcodes.ACC_FINAL, nestedClasses = nested, nestedInfo = None, - InlineInfo(None, true, Map.empty, None))) // no InlineInfo needed, scala never invokes methods on the mirror class + inlineInfo = EmptyInlineInfo.copy(isEffectivelyFinal = true))) // no method inline infos needed, scala never invokes methods on the mirror class c }) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala index b41d0de92f..005d01f187 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala @@ -1,7 +1,7 @@ package scala.tools.nsc package backend.jvm -import scala.tools.asm.tree.{InvokeDynamicInsnNode, AbstractInsnNode, MethodNode} +import scala.tools.asm.tree.{AbstractInsnNode, MethodNode} import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.reflect.internal.util.Position import scala.tools.nsc.settings.ScalaSettings @@ -228,7 +228,7 @@ object BackendReporting { def emitWarning(settings: ScalaSettings): Boolean = this match { case _: IllegalAccessInstruction | _: MethodWithHandlerCalledOnNonEmptyStack | _: SynchronizedMethod | _: StrictfpMismatch | _: ResultingMethodTooLarge => - settings.YoptWarningEmitAtInlineFailed + settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) case IllegalAccessCheckFailed(_, _, _, _, _, cause) => cause.emitWarning(settings) @@ -246,9 +246,11 @@ object BackendReporting { case class ResultingMethodTooLarge(calleeDeclarationClass: InternalName, name: String, descriptor: String, callsiteClass: InternalName, callsiteName: String, callsiteDesc: String) extends CannotInlineWarning + // TODO: this should be a subtype of CannotInlineWarning + // but at the place where it's created (in findIllegalAccess) we don't have the necessary data (calleeName, calleeDescriptor). case object UnknownInvokeDynamicInstruction extends OptimizerWarning { override def toString = "The callee contains an InvokeDynamic instruction with an unknown bootstrap method (not a LambdaMetaFactory)." - def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarningEmitAtInlineFailed + def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) } /** @@ -260,7 +262,7 @@ object BackendReporting { override def emitWarning(settings: ScalaSettings): Boolean = this match { case RewriteClosureAccessCheckFailed(_, cause) => cause.emitWarning(settings) - case RewriteClosureIllegalAccess(_, _) => settings.YoptWarningEmitAtInlineFailed + case RewriteClosureIllegalAccess(_, _) => settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) } override def toString: String = this match { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala index 03306f30aa..8d0547b607 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala @@ -8,6 +8,7 @@ package backend.jvm import scala.reflect.internal.util.Statistics +// Enable with `-Ystatistics:jvm` object BackendStats { import Statistics.{newTimer, newSubTimer} val bcodeTimer = newTimer("time in backend", "jvm") diff --git a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala index 00b4b8b667..e1a724f1cb 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala @@ -14,6 +14,7 @@ import scala.reflect.internal.util.Statistics import scala.tools.asm import scala.tools.asm.tree.ClassNode +import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository /* * Prepare in-memory representations of classfiles using the ASM Tree API, and serialize them to disk. @@ -186,7 +187,7 @@ abstract class GenBCode extends BCodeSyncAndTry { // -------------- "plain" class -------------- val pcb = new PlainClassBuilder(cunit) pcb.genPlainClass(cd) - val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null; + val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null val plainC = pcb.cnode // -------------- bean info class, if needed -------------- @@ -221,12 +222,18 @@ abstract class GenBCode extends BCodeSyncAndTry { class Worker2 { def runGlobalOptimizations(): Unit = { import scala.collection.convert.decorateAsScala._ - if (settings.YoptBuildCallGraph) { - q2.asScala foreach { - case Item2(_, _, plain, _, _) => - // skip mirror / bean: wd don't inline into tem, and they are not used in the plain class - if (plain != null) callGraph.addClass(plain) - } + + // add classes to the bytecode repo before building the call graph: the latter needs to + // look up classes and methods in the code repo. + if (settings.YoptAddToBytecodeRepository) q2.asScala foreach { + case Item2(_, mirror, plain, bean, _) => + if (mirror != null) byteCodeRepository.add(mirror, ByteCodeRepository.CompilationUnit) + if (plain != null) byteCodeRepository.add(plain, ByteCodeRepository.CompilationUnit) + if (bean != null) byteCodeRepository.add(bean, ByteCodeRepository.CompilationUnit) + } + if (settings.YoptBuildCallGraph) q2.asScala foreach { item => + // skip call graph for mirror / bean: wd don't inline into tem, and they are not used in the plain class + if (item.plain != null) callGraph.addClass(item.plain) } if (settings.YoptInlinerEnabled) bTypes.inliner.runInliner() diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 7bbe1e2a49..9e5fbfcc0e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -3,17 +3,22 @@ package backend.jvm package analysis import scala.annotation.switch -import scala.collection.{mutable, immutable} +import scala.collection.mutable import scala.tools.asm.Opcodes import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter} import opt.BytecodeUtils._ +import AliasSet.SmallBitSet -object AliasingFrame { - private var _idCounter: Long = 0l - private def nextId = { _idCounter += 1; _idCounter } -} - +/** + * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack. + * + * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple + * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower + * than a BasicValue analysis. + * + * See the doc of package object `analysis` for some notes on the performance of alias analysis. + */ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) { import Opcodes._ @@ -24,50 +29,66 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } /** - * For each slot (entry in the `values` array of the frame), an id that uniquely represents - * the object stored in it. If two values have the same id, they are aliases of the same - * object. - */ - private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId) - - /** - * The object alias id of for a value index. - */ - def aliasId(entry: Int) = aliasIds(entry) - - /** - * Returns the indices of the values array which are aliases of the object `id`. + * For every value the set of values that are aliases of it. + * + * Invariants: + * - If `aliases(i) == null` then i has no aliases. This is equivalent to having + * `aliases(i) == SingletonSet(i)`. + * - If `aliases(i) != null` then `aliases(i) contains i`. + * - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the + * same (mutable) AliasSet. */ - def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id) + val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize) /** * The set of aliased values for a given entry in the `values` array. */ - def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry)) + def aliasesOf(entry: Int): AliasSet = { + if (aliases(entry) != null) aliases(entry) + else { + val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1) + aliases(entry) = init + init + } + } /** - * Define a new alias. For example, given - * var a = this // this, a have the same aliasId - * then an assignment + * Define a new alias. For example, an assignment * b = a - * will set the same the aliasId for `b`. + * adds b to the set of aliases of a. */ private def newAlias(assignee: Int, source: Int): Unit = { - aliasIds(assignee) = aliasIds(source) + removeAlias(assignee) + val sourceAliases = aliasesOf(source) + sourceAliases += assignee + aliases(assignee) = sourceAliases } /** - * An assignment + * Remove an alias. For example, an assignment * a = someUnknownValue() - * sets a fresh alias id for `a`. - * A stack value is also removed from its alias set when being consumed. + * removes a from its former alias set. + * As another example, stack values are removed from their alias sets when being consumed. */ private def removeAlias(assignee: Int): Unit = { - aliasIds(assignee) = AliasingFrame.nextId + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + aliases(assignee) = null + } + } + + /** + * Define the alias set for a given value. + */ + private def setAliasSet(assignee: Int, set: AliasSet): Unit = { + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + } + aliases(assignee) = set } override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = { - // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop) + // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop) def stackTop: Int = this.stackTop def peekStack(n: Int): V = this.peekStack(n) @@ -166,14 +187,34 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } case SWAP => + // could be written more elegantly with higher-order combinators, but thinking of performance val top = stackTop - val idTop = aliasIds(top) - aliasIds(top) = aliasIds(top - 1) - aliasIds(top - 1) = idTop + + def moveNextToTop(): Unit = { + val nextAliases = aliases(top - 1) + aliases(top) = nextAliases + nextAliases -= (top - 1) + nextAliases += top + } + + if (aliases(top) != null) { + val topAliases = aliases(top) + if (aliases(top - 1) != null) moveNextToTop() + else aliases(top) = null + // move top to next + aliases(top - 1) = topAliases + topAliases -= top + topAliases += (top - 1) + } else { + if (aliases(top - 1) != null) { + moveNextToTop() + aliases(top - 1) = null + } + } case opcode => if (opcode == ASTORE) { - // Not a separate case because we need to remove the consumed stack value from alias sets after. + // not a separate case: we re-use the code below that removes the consumed stack value from alias sets val stackTopBefore = stackTop - produced + consumed val local = insn.asInstanceOf[VarInsnNode].`var` newAlias(assignee = local, source = stackTopBefore) @@ -198,10 +239,22 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc val firstConsumed = stackTop - produced + 1 // firstConsumed = 3 for (i <- 0 until consumed) removeAlias(firstConsumed + i) // remove aliases for 3 and 4 + } + } - // We don't need to set the aliases ids for the produced values: the aliasIds array already - // contains fresh ids for non-used stack values (ensured by removeAlias). + /** + * When entering an exception handler, all values are dropped from the stack (and the exception + * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To + * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets. + */ + override def clearStack(): Unit = { + var i = getLocals + val end = i + getStackSize + while (i < end) { + removeAlias(i) + i += 1 } + super.clearStack() } /** @@ -217,30 +270,124 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc * x = a * y = b // (x, a) and (y, b) * } - * [...] // (x, a) + * [...] // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b)) */ override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = { + // merge is the main performance hot spot of a data flow analysis. + + // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of + // the overall analysis time. val valuesChanged = super.merge(other, interpreter) + + // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this + // code has been heavily optimized. most of the time is spent in the `hasNext` method of the + // andNotIterator, see its comment. + var aliasesChanged = false val aliasingOther = other.asInstanceOf[AliasingFrame[_]] - for (i <- aliasIds.indices) { - val thisAliases = aliasesOf(i) - val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i)) - if (thisNotOther.nonEmpty) { - aliasesChanged = true - thisNotOther foreach removeAlias + + val numValues = getLocals + getStackSize + // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set + // for a, we already see that a and b will be aliases in the final result. so we can skip over + // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be + // set to `true`. + val knownOk = new Array[Boolean](numValues) + var i = 0 + while (i < numValues) { + if (!knownOk(i)) { + val thisAliases = this.aliases(i) + val otherAliases = aliasingOther.aliases(i) + if (thisAliases != null && otherAliases != null) { + // The iterator yields elements that are in `thisAliases` but not in `otherAliases`. + // As a side-effect, for every index `i` that is in both alias sets, the iterator sets + // `knownOk(i) = true`: the alias sets for these values don't need to be merged again. + val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk) + if (thisNotOtherIt.hasNext) { + aliasesChanged = true + val newSet = AliasSet.empty + while (thisNotOtherIt.hasNext) { + val next = thisNotOtherIt.next() + newSet += next + setAliasSet(next, newSet) + } + } + } } + i += 1 } + valuesChanged || aliasesChanged } + private def min(s: SmallBitSet) = { + var r = s.a + if ( s.b < r) r = s.b + if (s.c != -1 && s.c < r) r = s.c + if (s.d != -1 && s.d < r) r = s.d + r + } + override def init(src: Frame[_ <: V]): Frame[V] = { - super.init(src) - compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length) + super.init(src) // very quick (just an arraycopy) + System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick + + val newSets = mutable.HashMap.empty[AliasSet, AliasSet] + + // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to + // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time. + // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are + // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the + // two values. + + var i = 0 + while (i < aliases.length) { + val set = aliases(i) + if (set != null) { + // size cannot be 0 - alias sets are always at least singletons. + // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow + if (set.size == 1) { + aliases(i) = null + } else if (set.size <= 4) { + val small = set.set.asInstanceOf[AliasSet.SmallBitSet] + val firstOfSet = i == min(small) + if (firstOfSet) { + val newSet = set.clone() + aliases(small.a) = newSet + aliases(small.b) = newSet + if (small.c != -1) aliases(small.c) = newSet + if (small.d != -1) aliases(small.d) = newSet + } + } else { + // the actual hot spot is the hash map operations here: this is where almost all of the 20% + // mentioned above is spent. + // i also benchmarked an alternative implementation: keep an array of booleans for indexes + // that already contain the cloned set. iterate through all elements of the cloned set and + // assign the cloned set. this approach is 50% slower than using a hash map. + if (newSets contains set) aliases(i) = newSets(set) + else { + val newSet = set.clone() + newSets(set) = newSet + aliases(i) = newSet + } + } + } + i += 1 + } this } } +object AliasingFrame { +// val start1 = AliasingFrame.timer1.start() +// AliasingFrame.timer1.stop(start1) + import scala.reflect.internal.util.Statistics._ + val timer1 = newTimer("t1", "jvm") + val timer2 = newTimer("t2", "jvm") + val timer3 = newTimer("t3", "jvm") + val timers = List(timer1, timer2, timer3) + def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 } +} + /** * An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis * needs to track aliases, but doesn't require a more specific Frame subclass. @@ -249,3 +396,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack) override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src) } + +/** + * An iterator over Int (required to prevent boxing the result of next). + */ +abstract class IntIterator extends Iterator[Int] { + def hasNext: Boolean + def next(): Int +} + +/** + * An efficient mutable bit set. + * + * @param set Either a SmallBitSet or an Array[Long] + * @param size The size of the set, useful for performance of certain operations + */ +class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) { + import AliasSet._ + + override def toString: String = set.toString + + /** + * An iterator for the elements of this bit set. Note that only one iterator can be used at a + * time. Also make sure not to change the underlying AliasSet during iteration. + */ + def iterator: IntIterator = andNotIterator(this, empty, null) + + def +=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => s.a = value; size = 1 + case 1 => if (value != s.a) { s.b = value; size = 2 } + case 2 => if (value != s.a && value != s.b) { s.c = value; size = 3 } + case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 } + case 4 => + if (value != s.a && value != s.b && value != s.c && value != s.d) { + this.set = bsEmpty + this.size = 0 + bsAdd(this, s.a) + bsAdd(this, s.b) + bsAdd(this, s.c) + bsAdd(this, s.d) + bsAdd(this, value) + } + } + case bits: Array[Long] => + bsAdd(this, value) + } + + def -=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => + case 1 => + if (value == s.a) { s.a = -1; size = 0 } + case 2 => + if (value == s.a) { s.a = s.b; s.b = -1; size = 1 } + else if (value == s.b) { s.b = -1; size = 1 } + case 3 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 } + else if (value == s.b) { s.b = s.c; s.c = -1; size = 2 } + else if (value == s.c) { s.c = -1; size = 2 } + case 4 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.b) { s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.c) { s.c = s.d; s.d = -1; size = 3 } + else if (value == s.d) { s.d = -1; size = 3 } + } + case bits: Array[Long] => + bsRemove(this, value) + if (this.size == 4) + this.set = bsToSmall(this.set.asInstanceOf[Array[Long]]) + } + + override def clone(): AliasSet = { + val resSet = this.set match { + case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d) + case bits: Array[Long] => bits.clone() + } + new AliasSet(resSet, this.size) + } +} + +object AliasSet { + def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0) + + final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) { + override def toString = s"($a, $b, $c, $d)" + } + + def bsEmpty: Array[Long] = new Array[Long](1) + + private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = { + if (index < set.length) set + else { + var newLength = set.length + while (index >= newLength) newLength *= 2 + val newSet = new Array[Long](newLength) + Array.copy(set, 0, newSet, 0, set.length) + newSet + } + } + + def bsAdd(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + val resSet = bsEnsureCapacity(bits, index) + val before = resSet(index) + val result = before | (1l << bit) + if (result != before) { + resSet(index) = result + set.set = resSet + set.size += 1 + } + } + + def bsRemove(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + if (index < bits.length) { + val before = bits(index) + val result = before & ~(1l << bit) + if (result != before) { + bits(index) = result + set.size -= 1 + } + } + } + + def bsContains(set: Array[Long], bit: Int): Boolean = { + val index = bit >> 6 + bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L + } + +// var sizesHist: Array[Int] = new Array[Int](1000) + + /** + * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits. + */ + def bsToSmall(bits: Array[Long]): SmallBitSet = { + var a = -1 + var b = -1 + var c = -1 + var i = 0 + val end = bits.length * 64 + while (i < end) { + if (bsContains(bits, i)) { + if (a == -1) a = i + else if (b == -1) b = i + else if (c == -1) c = i + else return new SmallBitSet(a, b, c, i) + } + i += 1 + } + null + } + + /** + * An iterator that yields the elements that are in one bit set and not in another (&~). + */ + private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator { + // values in the first bit set + private var a, b, c, d = -1 + private var xs: Array[Long] = null + + // values in the second bit set + private var notA, notB, notC, notD = -1 + private var notXs: Array[Long] = null + + // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext + private var abcdNext = -1 + + // counts through elements in the `xs` bit set + private var i = 0 + // true if the current value of `i` should be returned by this iterator + private var iValid = false + + setA.set match { + case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d + case bits: Array[Long] => xs = bits + } + + setB.set match { + case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d + case bits: Array[Long] => notXs = bits + } + + // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true. + // hacky side-effect, used for performance of AliasingFrame.merge. + private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true + + private def checkABCD(x: Int, num: Int): Boolean = { + // assert(x == a && num == 1 || x == b && num == 2 || ...) + x != -1 && { + val otherHasA = x == notA || x == notB || x == notC || x == notD || (notXs != null && bsContains(notXs, x)) + if (otherHasA) setThisAndOther(x) + else abcdNext = x + (num: @switch) match { + case 1 => a = -1 + case 2 => b = -1 + case 3 => c = -1 + case 4 => d = -1 + } + !otherHasA + } + } + + // main performance hot spot + private def checkXs = { + (xs != null) && { + val end = xs.length * 64 + + while (i < end && { + val index = i >> 6 + if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time. + i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body + true + } else { + val mask = 1l << i + // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks + val thisHasI = (xs(index) & mask) != 0l + !thisHasI || { + val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l) + if (otherHasI) setThisAndOther(i) + otherHasI + } + } + }) i += 1 + + iValid = i < end + iValid + } + } + + // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time + // is spent here. within hasNext, almost the entire time is spent in `checkXs`. + // + def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs + + def next(): Int = { + if (hasNext) { + if (abcdNext != -1) { + val r = abcdNext; abcdNext = -1; r + } else { + val r = i; i += 1; iValid = false; r + } + } else Iterator.empty.next() + } + } + +// The number of bits in a bit array. Useful for debugging. +// def bsSize(bits: Array[Long]) = { +// var r = 0 +// var i = 0 +// while (i < bits.length) { +// r += java.lang.Long.bitCount(bits(i)) +// i += 1 +// } +// r +// } + + /** + * An iterator returning the elements in a that are not also in b (a &~ b). + * + * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that + * is both in a and b (&). + */ + def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther) +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala new file mode 100644 index 0000000000..bb5c6e3820 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala @@ -0,0 +1,48 @@ +package scala.tools.nsc +package backend.jvm +package analysis + +import scala.tools.asm.tree.{AbstractInsnNode, MethodNode} +import scala.tools.asm.tree.analysis.{Frame, BasicInterpreter, Analyzer, Value} +import scala.tools.nsc.backend.jvm.BTypes._ +import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + +/** + * This component hosts tools for running ASM analyzers that require access to a `BTypes` instance. + * In particular, the AsmAnalyzer class runs `computeMaxLocalsMaxStack` on the methodNode to be + * analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run + * cache `maxLocalsMaxStackComputed` defined in there. + */ +class Analyzers[BT <: BTypes](val btypes: BT) { + import btypes._ + + /** + * A wrapper to make ASM's Analyzer a bit easier to use. + */ + class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) { + localOpt.computeMaxLocalsMaxStack(methodNode) + analyzer.analyze(classInternalName, methodNode) + def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) + } + + /** + * See the doc comment on package object `analysis` for a discussion on performance. + */ + object AsmAnalyzer { + // jvm limit is 65535 for both number of instructions and number of locals + + private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals + + // with the limits below, analysis should not take more than one second + + private val nullnessSizeLimit = 5000l * 600l * 600l // 5000 insns, 600 locals + private val basicValueSizeLimit = 9000l * 1000l * 1000l + private val sourceValueSizeLimit = 8000l * 950l * 950l + + def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit + def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit + def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit + } + + class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala index 31b62f747e..f9ac12eb4d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala @@ -7,66 +7,12 @@ import java.util import scala.annotation.switch import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode} -import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value} +import scala.tools.asm.tree.analysis._ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils import BytecodeUtils._ /** - * Some notes on the ASM analyzer framework. - * - * Value - * - Abstract, needs to be implemented for each analysis. - * - Represents the desired information about local variables and stack values, for example: - * - Is this value known to be null / not null? - * - What are the instructions that could potentially have produced this value? - * - * Interpreter - * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing - * interpreter, e.g., SourceInterpreter or BasicInterpreter. - * - Multiple abstract methods that receive an instruction and the instruction's input values, and - * return a value representing the result of that instruction. - * - Note: due to control flow, the interpreter can be invoked multiple times for the same - * instruction, until reaching a fixed point. - * - Abstract `merge` function that computes the least upper bound of two values. Used by - * Frame.merge (see below). - * - * Frame - * - Can be used directly for many analyses, no subclass required. - * - Every frame has an array of values: one for each local variable and for each stack slot. - * - A `top` index stores the index of the current stack top - * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty - * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next - * stack value. - * - Defines the `execute(instruction)` method. - * - executing mutates the state of the frame according to the effect of the instruction - * - pop consumed values from the stack - * - pass them to the interpreter together with the instruction - * - if applicable, push the resulting value on the stack - * - Defines the `merge(otherFrame)` method - * - called by the analyzer when multiple control flow paths lead to an instruction - * - the frame at the branching instruction is merged into the current frame of the - * instruction (held by the analyzer) - * - mutates the values of the current frame, merges all values using interpreter.merge. - * - * Analyzer - * - Stores a frame for each instruction - * - `merge` function takes an instruction and a frame, merges the existing frame for that instr - * (from the frames array) with the new frame passed as argument. - * if the frame changed, puts the instruction on the work queue (fixpiont). - * - initial frame: initialized for first instr by calling interpreter.new[...]Value - * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` - * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) - * - analyze(method): simulate control flow. while work queue non-empty: - * - copy the state of `frames[instr]` into a local frame `current` - * - call `current.execute(instr, interpreter)`, mutating the `current` frame - * - if it's a branching instruction - * - for all potential destination instructions - * - merge the destination instruction frame with the `current` frame - * (this enqueues the destination instr if its frame changed) - * - invoke `newControlFlowEdge` (see below) - * - the analyzer also tracks active exception handlers at each instruction - * - the empty method `newControlFlowEdge` can be overridden to track control flow if required - * + * See the package object `analysis` for details on the ASM analysis framework. * * Some notes on nullness analysis. * @@ -87,56 +33,34 @@ import BytecodeUtils._ */ /** - * Type to represent nullness of values. - */ -sealed trait Nullness { - final def merge(other: Nullness) = if (this == other) this else Unknown -} -case object NotNull extends Nullness -case object Unknown extends Nullness -case object Null extends Nullness - -/** * Represents the nullness state for a local variable or stack value. * - * Note that nullness of primitive values is not tracked, it will be always [[Unknown]]. + * Note that nullness of primitive values is not tracked, it will be always unknown. */ -sealed trait NullnessValue extends Value { - /** - * The nullness of this value. - */ - def nullness: Nullness - - /** - * True if this value is a long or double. The Analyzer framework needs to know - * the size of each value when interpreting instructions, see `Frame.execute`. - */ - def isSize2: Boolean +sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value { /** * The size of the slot described by this value. Cannot be 0 because no values are allocated * for void-typed slots, see NullnessInterpreter.newValue. **/ def getSize: Int = if (isSize2) 2 else 1 - def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2) + def merge(other: NullnessValue) = { + if (this eq other) this + else if (this eq UnknownValue2) this // the only possible value of size two + else UnknownValue1 + } + + final override def equals(other: Any) = this eq other.asInstanceOf[Object] } -object NullValue extends NullnessValue { def nullness = Null; def isSize2 = false; override def toString = "Null" } -object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" } -object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true; override def toString = "Unknown2" } -object NotNullValue extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull" } +object NullValue extends NullnessValue(isSize2 = false) { override def toString = "Null" } +object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" } +object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" } +object NotNullValue extends NullnessValue(isSize2 = false) { override def toString = "NotNull" } object NullnessValue { - def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = { - if (nullness == Null) NullValue - else if (nullness == NotNull) NotNullValue - else if (isSize2) UnknownValue2 - else UnknownValue1 - } - - def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = { - apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2) - } + def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1 + def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1 } final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) { @@ -151,29 +75,25 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes // `newValue(null)` for each local variable. We have to return a value of size 1. if (tp == Type.VOID_TYPE) null // (1) - else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) + else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) } override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = { // For instance methods, the `this` parameter is known to be not null. - if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false) + if (isInstanceMethod && local == 0) NotNullValue else super.newParameterValue(isInstanceMethod, local, tp) } - def newOperation(insn: AbstractInsnNode): NullnessValue = { - val nullness = (insn.getOpcode: @switch) match { - case Opcodes.ACONST_NULL => Null + def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match { + case Opcodes.ACONST_NULL => NullValue - case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { - case _: String | _: Type => NotNull - case _ => Unknown - } - - case _ => Unknown + case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { + case _: String | _: Type => NotNullValue + case _ => NullnessValue.unknown(insn) } // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call. - NullnessValue(nullness, insn) + case _ => NullnessValue.unknown(insn) } def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value @@ -182,26 +102,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) case Opcodes.CHECKCAST => value case Opcodes.NEWARRAY | - Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false) + Opcodes.ANEWARRAY => NotNullValue - case _ => NullnessValue(Unknown, insn) + case _ => NullnessValue.unknown(insn) } def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = { - NullnessValue(Unknown, insn) + NullnessValue.unknown(insn) } - def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = { - NullnessValue(Unknown, isSize2 = false) - } + def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1 def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match { case Opcodes.MULTIANEWARRAY => - NullnessValue(NotNull, isSize2 = false) + NotNullValue case _ => // TODO: use a list of methods that are known to return non-null values - NullnessValue(Unknown, insn) + NullnessValue.unknown(insn) } def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = () @@ -219,8 +137,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = { import Opcodes._ - // get the object id of the object that is known to be not-null after this operation - val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match { + // get the alias set the object that is known to be not-null after this operation. + // alias sets are mutable / mutated, so after super.execute, this set contains the remaining + // aliases of the value that becomes not-null. + val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match { case IALOAD | LALOAD | FALOAD | @@ -229,7 +149,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal BALOAD | CALOAD | SALOAD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case IASTORE | FASTORE | @@ -239,35 +159,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal SASTORE | LASTORE | DASTORE => - aliasId(this.stackTop - 2) + aliasesOf(this.stackTop - 2) case GETFIELD => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case PUTFIELD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case INVOKEVIRTUAL | INVOKESPECIAL | INVOKEINTERFACE => val desc = insn.asInstanceOf[MethodInsnNode].desc val numArgs = Type.getArgumentTypes(desc).length - aliasId(this.stackTop - numArgs) + aliasesOf(this.stackTop - numArgs) case ARRAYLENGTH | MONITORENTER | MONITOREXIT => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case _ => - -1 + null } super.execute(insn, interpreter) - if (nullCheckedAliasId != -1) { - for (i <- valuesWithAliasId(nullCheckedAliasId)) - this.setValue(i, NotNullValue) + if (nullCheckedAliases != null) { + val it = nullCheckedAliases.iterator + while (it.hasNext) + this.setValue(it.next(), NotNullValue) } } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala index 1c24acba03..ce2fe943e4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala @@ -55,24 +55,16 @@ import scala.collection.convert.decorateAsScala._ * * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as * copying operations. + * + * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM) + * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge + * function (merging producer sets) is more complex than merging simple basic values. + * See also the doc comment in the package object `analysis`. */ -class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) { - - /* Timers for benchmarking ProdCons - import scala.reflect.internal.util.Statistics._ - import ProdConsAnalyzer._ - val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer) - val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer) - */ - - val analyzer = new Analyzer(new InitialProducerSourceInterpreter) +trait ProdConsAnalyzerImpl { + val methodNode: MethodNode -// val start = analyzerTimer.start() - analyzer.analyze(classInternalName, methodNode) -// analyzerTimer.stop(start) -// println(analyzerTimer.line) - - def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode) + def frameAt(insn: AbstractInsnNode): Frame[SourceValue] /** * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`. @@ -404,7 +396,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) /** For each instruction, a set of potential consumers of the produced values. */ private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = { -// val start = consumersTimer.start() var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] for { insn <- methodNode.instructions.iterator.asScala @@ -417,8 +408,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) val outputIndex = producedSlots.indexOf(i) res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn)) } -// consumersTimer.stop(start) -// println(consumersTimer.line) res } @@ -426,11 +415,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty } -object ProdConsAnalyzer { - import scala.reflect.internal.util.Statistics._ - val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm") -} - /** * A class for pseudo-instructions representing the initial producers of local values that have * no producer instruction in the method: diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala new file mode 100644 index 0000000000..f1ac703532 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala @@ -0,0 +1,374 @@ +package scala.tools.nsc.backend.jvm + +/** + * Summary on the ASM analyzer framework + * -------------------------------------- + * + * Value + * - Abstract, needs to be implemented for each analysis. + * - Represents the desired information about local variables and stack values, for example: + * - Is this value known to be null / not null? + * - What are the instructions that could potentially have produced this value? + * + * Interpreter + * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing + * interpreter, e.g., SourceInterpreter or BasicInterpreter. + * - Multiple abstract methods that receive an instruction and the instruction's input values, and + * return a value representing the result of that instruction. + * - Note: due to control flow, the interpreter can be invoked multiple times for the same + * instruction, until reaching a fixed point. + * - Abstract `merge` function that computes the least upper bound of two values. Used by + * Frame.merge (see below). + * + * Frame + * - Can be used directly for many analyses, no subclass required. + * - Every frame has an array of values: one for each local variable and for each stack slot. + * - A `top` index stores the index of the current stack top + * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty + * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next + * stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode. + * See comment below. + * - Defines the `execute(instruction)` method. + * - executing mutates the state of the frame according to the effect of the instruction + * - pop consumed values from the stack + * - pass them to the interpreter together with the instruction + * - if applicable, push the resulting value on the stack + * - Defines the `merge(otherFrame)` method + * - called by the analyzer when multiple control flow paths lead to an instruction + * - the frame at the branching instruction is merged into the current frame of the + * instruction (held by the analyzer) + * - mutates the values of the current frame, merges all values using interpreter.merge. + * + * Analyzer + * - Stores a frame for each instruction + * - `merge` function takes an instruction and a frame, merges the existing frame for that instr + * (from the frames array) with the new frame passed as argument. + * if the frame changed, puts the instruction on the work queue (fixpiont). + * - initial frame: initialized for first instr by calling interpreter.new[...]Value + * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` + * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) + * - analyze(method): simulate control flow. while work queue non-empty: + * - copy the state of `frames[instr]` into a local frame `current` + * - call `current.execute(instr, interpreter)`, mutating the `current` frame + * - if it's a branching instruction + * - for all potential destination instructions + * - merge the destination instruction frame with the `current` frame + * (this enqueues the destination instr if its frame changed) + * - invoke `newControlFlowEdge` (see below) + * - the analyzer also tracks active exception handlers at each instruction + * - the empty method `newControlFlowEdge` can be overridden to track control flow if required + * + * + * MaxLocals and MaxStack + * ---------------------- + * + * At the JVM level, long and double values occupy two slots, both as local variables and on the + * stack, as specified in the JVM spec 2.6.2: + * "At any point in time, an operand stack has an associated depth, where a value of type long or + * double contributes two units to the depth and a value of any other type contributes one unit." + * + * For example, a method + * class A { def f(a: Long, b: Long) = a + b } + * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when + * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter). + * + * For running an ASM Analyzer, long and double values occupy two local variable slots, but only + * a single slot on the call stack, as shown by the following snippet: + * + * import scala.tools.nsc.backend.jvm._ + * import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + * import scala.collection.convert.decorateAsScala._ + * import scala.tools.asm.tree.analysis._ + * + * val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class") + * val m = cn.methods.iterator.asScala.find(_.name == "f").head + * + * // the value is read from the classfile, so it's 4 + * println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4 + * + * // we can safely set it to 2 for running the analyzer. + * m.maxStack = 2 + * + * val a = new Analyzer(new BasicInterpreter) + * a.analyze(cn.name, m) + * val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode + * val addFrame = a.frameAt(addInsn, m) + * + * addFrame.getStackSize // 2: the two long values only take one slot each + * addFrame.getLocals // 5: this takes one slot, the two long parameters take 2 slots each + * + * + * While running the optimizer, we need to make sure that the `maxStack` value of a method is + * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in + * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter. + * + * + * Lessons learnt while benchmarking the alias tracking analysis + * ------------------------------------------------------------- + * + * Profiling + * - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details + * of a hotspot, don't pay too much attention to the percentages / time counts. + * - Should also try other profilers. + * - Use timers. When a method showed up as a hotspot, i added a timer around that method, and a + * second one within the method to measure specific parts. The timers slow things down, but the + * relative numbers show what parts of a method are slow. + * + * ASM analyzer insights + * - The time for running an analysis depends on the number of locals and the number of instructions. + * Reducing the number of locals helps speeding up the analysis: there are less values to + * merge when merging to frames. + * See also https://github.com/scala/scala-dev/issues/47 + * - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers. + * - For nullness analysis the time is spent as follows + * - 20% merging nullness values. this is as expected: for example, the same absolute amount of + * time is spent in merging BasicValues when running a BasicInterpreter. + * - 50% merging alias sets. i tried to optimize what i could out of this. + * - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init. + * - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to + * `interpreter.merge`. This can be observed easily by running a test program that either runs + * a BasicValue analysis only, versus a program that first runs a nullness analysis and then + * a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms, + * a 3.8x slowdown. + * - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the + * examples I benchmarked, the number of merge invocations was always exactly the same. + * It would probably be possible to come up with an example where alias set merging forces + * additional analysis rounds until reaching the fixpoint, but I did not observe such cases. + * + * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler + * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze + * can simply be parsed from a classfile. See example at the end of this comment. + * + * + * Nullness Analysis in Miguel's Optimizer + * --------------------------------------- + * + * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every + * frame has an array of values. Miguel's idea was to represent aliasing using reference equality + * in the values array: if two entries in the array point to the same value object, the two entries + * are aliases in the frame of the given instruction. + * + * While this idea seems elegant at first sight, Miguel's implementation does not merge frames + * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in + * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that + * c is removed as an alias of (a, b). + * + * It would be possible to implement correct alias set merging in Miguel's approach. However, frame + * merging is the main hot spot of analysis. The computational complexity of implementing alias set + * merging by traversing the values array and comparing references is too high. The concrete + * alias set representation that is used in the current implementation (see class AliasingFrame) + * makes alias set merging more efficient. + * + * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java + * + * + * Complexity and scaling of analysis + * ---------------------------------- + * + * The time complexity of a data flow analysis depends on: + * + * - The size of the method. The complexity factor is linear (assuming the number of locals and + * branching instructions remains constant). The main analysis loop runs through all + * instructions of a method once. Instructions are only re-enqueued if a control flow merge + * changes the frame at some instruction. + * + * - The branching instructions. When a second (third, ..) control flow edge arrives at an + * instruction, the existing frame at the instruction is merged with the one computed on the + * new branch. If the merge function changes the existing frame, the instruction is enqueued + * for another analysis. This results in a merge operation for the successors of the + * instruction. + * + * - The number of local variables. The hot spot of analysis is frame merging. The merge function + * iterates through the values in the frame (locals and stack values) and merges them. + * + * I measured the running time of an analysis for two examples: + * - Keep the number of locals and branching instructions constant, increase the number of + * instructions. The running time grows linearly with the method size. + * - Increase the size and number of locals in a method. The method size and number of locals + * grow in the same pace. Here, the running time increase is polynomial. It looks like the + * complexity is be #instructions * #locals^2 (see below). + * + * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs + * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change. + * + * The numbers below are for nullness. Note that the the last column is constant, i.e., the running + * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal + * method size for running an analysis. + * + * #insns #locals time (ms) time / #ins * #loc^2 * 10^6 + * 1305 156 34 1.07 + * 2610 311 165 0.65 + * 3915 466 490 0.57 + * 5220 621 1200 0.59 + * 6525 776 2220 0.56 + * 7830 931 3830 0.56 + * 9135 1086 6570 0.60 + * 10440 1241 9700 0.60 + * 11745 1396 13800 0.60 + * + * As a second experiment, nullness analysis was run with varying #insns but constant #locals. + * The last column shows linear complexity with respect to the method size (linearOffset = 2279): + * + * #insns #locals time (ms) (time + linearOffset) / #insns + * 5220 621 1090 0.645 + * 6224 621 1690 0.637 + * 7226 621 2280 0.630 + * 8228 621 2870 0.625 + * 9230 621 3530 0.629 + * 10232 621 4130 0.626 + * 11234 621 4770 0.627 + * 12236 621 5520 0.637 + * 13238 621 6170 0.638 + * + * + * When running a BasicValue analysis, the complexity observation is the same (time is proportional + * to #ins * #loc^2). + * + * + * Measuring analysis execution time + * --------------------------------- + * + * See code below. + */ + +/* +object Test { + val overwrite: Option[String] = null + + @noinline def serialize(o: AnyRef): String = null + + @noinline def deserialize(string: String): AnyRef = null + + @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) { + val result = serialize(instance) + val reconstituted = deserialize(result).asInstanceOf[T] + assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted))) + } + + @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) { + // pattern match to introduce a lot of control flow, i.e., a lot of frame merges + overwrite match { + case Some(f) => + case None => + checkRoundTrip(instance)(f) + assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance) + assert(prevResult == "res", instance) + } + } + + // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x + + def testMain(): Unit = { + // every call to check creates quite a number of locals, and also quite a number of aliases + // of the same value (x1). First of all, the default argument call is expanded as below. Then + // method check is inlined, and within the body of check, checkRoundTrip and assert have + // already been inlined as well. + + // { + // val x1 = () => "" + // val x2 = fun(x1()) // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1 + // check(x1())("", x2) // same here for x1 + // } + + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 5 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 10 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 15 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 20 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 25 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 30 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 35 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 40 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 45 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 50 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 55 + + // 1000 bytecode instructions, 0 locals + // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); + } + + def timed[T](f: => T): T = { + val start = System.nanoTime() + val r = f + val nanos = System.nanoTime() - start + println(s"took ${nanos/1000000}ms") + r + } + + def main(args: Array[String]): Unit = { + import scala.tools.nsc.backend.jvm._ + val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class") + import scala.collection.convert.decorateAsScala._ + val m = cn.methods.iterator.asScala.find(_.name == "testMain").head + + println(s"${m.instructions.size} instructions - ${m.maxLocals} locals") + + val a = new analysis.NullnessAnalyzer + a.analyze(cn.name, m) // warm up + + analysis.AliasingFrame.reset() + timed(a.analyze(cn.name, m)) + analysis.AliasingFrame.timers foreach println + + println("---") + + // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue + // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside + // Frame.merge is no longer megamorphic. + + import scala.tools.asm.tree.analysis._ + val ba = new Analyzer(new BasicInterpreter) + ba.analyze(cn.name, m) // warm up + + timed(ba.analyze(cn.name, m)) + + println("---") + + timed(a.analyze(cn.name, m)) + } +} +*/ +package object analysis diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index a5b85e54e7..4492d0baf5 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -10,6 +10,7 @@ package opt import scala.tools.asm import asm.tree._ import scala.collection.convert.decorateAsScala._ +import scala.collection.concurrent import scala.tools.asm.Attribute import scala.tools.nsc.backend.jvm.BackendReporting._ import scala.tools.nsc.io.AbstractFile @@ -24,39 +25,52 @@ import java.util.concurrent.atomic.AtomicLong * classpath. Parsed classes are cached in the `classes` map. * * @param classPath The compiler classpath where classfiles are searched and read from. - * @param classes Cache for parsed ClassNodes. Also stores the source of the bytecode: - * [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode - * corresponds to a class being compiled. - * The `Long` field encodes the age of the node in the map, which allows removing - * old entries when the map grows too large. - * For Java classes in mixed compilation, the map contains an error message: no - * ClassNode is generated by the backend and also no classfile that could be parsed. */ -class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJavaSourceDefined: InternalName => Boolean, val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]]) { +class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFile], val btypes: BT) { + import btypes._ + + /** + * ClassNodes for classes being compiled in the current compilation run. + */ + val compilingClasses: concurrent.Map[InternalName, ClassNode] = recordPerRunCache(concurrent.TrieMap.empty) + + /** + * Cache for parsed ClassNodes. + * The `Long` field encodes the age of the node in the map, which allows removing old entries when + * the map grows too large (see limitCacheSize). + * For Java classes in mixed compilation, the map contains an error message: no ClassNode is + * generated by the backend and also no classfile that could be parsed. + */ + val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty) private val maxCacheSize = 1500 private val targetSize = 500 - private val idCounter = new AtomicLong(0) + private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable { + def clear(): Unit = { this.set(0l) } + } + recordPerRunCache(lruCounter) /** * Prevent the code repository from growing too large. Profiling reveals that the average size * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb. - * - * We can only remove classes with `Source == Classfile`, those can be parsed again if requested. */ private def limitCacheSize(): Unit = { - if (classes.count(c => c._2.isRight && c._2.right.get._2 == Classfile) > maxCacheSize) { - val removeId = idCounter.get - targetSize - val toRemove = classes.iterator.collect({ - case (name, Right((_, Classfile, id))) if id < removeId => name - }).toList - toRemove foreach classes.remove + if (parsedClasses.size > maxCacheSize) { + // OK if multiple threads get here + val minimalLRU = parsedClasses.valuesIterator.collect({ + case Right((_, lru)) => lru + }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue) + parsedClasses retain { + case (_, Right((_, lru))) => lru > minimalLRU + case _ => false + } } } def add(classNode: ClassNode, source: Source) = { - classes(classNode.name) = Right((classNode, source, idCounter.incrementAndGet())) + if (source == CompilationUnit) compilingClasses(classNode.name) = classNode + else parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet())) } /** @@ -64,18 +78,32 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav * parsed from the classfile on the compile classpath. */ def classNodeAndSource(internalName: InternalName): Either[ClassNotFound, (ClassNode, Source)] = { - val r = classes.getOrElseUpdate(internalName, { - limitCacheSize() - parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet())) + classNode(internalName) map (n => { + val source = if (compilingClasses contains internalName) CompilationUnit else Classfile + (n, source) }) - r.map(v => (v._1, v._2)) } /** * The class node for an internal name. If the class node is not yet available, it is parsed from * the classfile on the compile classpath. */ - def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = classNodeAndSource(internalName).map(_._1) + def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = { + compilingClasses.get(internalName).map(Right(_)) getOrElse { + val r = parsedClasses.get(internalName) match { + case Some(l @ Left(_)) => l + case Some(r @ Right((classNode, _))) => + parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet())) + r + case None => + limitCacheSize() + val res = parseClass(internalName).map((_, lruCounter.incrementAndGet())) + parsedClasses(internalName) = res + res + } + r.map(_._1) + } + } /** * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`. @@ -86,7 +114,6 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav */ def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = { def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = { - def msg = s"The field node $name$descriptor could not be found in class $classInternalName or any of its superclasses." classNode(parent) match { case Left(e) => Left(FieldNotFound(name, descriptor, classInternalName, Some(e))) case Right(c) => @@ -105,6 +132,11 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`. * The declaration of the method may be in one of the parents. * + * TODO: make sure we always return the right method, the one being invoked. write tests. + * - if there's an abstract and a concrete one. could possibly somehow the abstract be returned? + * - with traits and default methods, if there is more than one default method inherited and + * no override: what should be returned? We should not just inline one of the two. + * * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring * class, or an error message if the method could not be found. */ @@ -157,7 +189,7 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav classNode } match { case Some(node) => Right(node) - case None => Left(ClassNotFound(internalName, isJavaSourceDefined(internalName))) + case None => Left(ClassNotFound(internalName, javaDefinedClasses(internalName))) } } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala index df8dcc690a..ea186f9a1b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala @@ -173,6 +173,11 @@ object BytecodeUtils { case Opcodes.IFNONNULL => Opcodes.IFNULL } + def isSize2LoadOrStore(opcode: Int): Boolean = (opcode: @switch) match { + case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true + case _ => false + } + def getPop(size: Int): InsnNode = { val op = if (size == 1) Opcodes.POP else Opcodes.POP2 new InsnNode(op) @@ -222,29 +227,6 @@ object BytecodeUtils { } } - /** - * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM - * framework only computes these values during bytecode generation. - * - * Since there's currently no better way, we run a bytecode generator on the method and extract - * the computed values. This required changes to the ASM codebase: - * - the [[MethodWriter]] class was made public - * - accessors for maxLocals / maxStack were added to the MethodWriter class - * - * We could probably make this faster (and allocate less memory) by hacking the ASM framework - * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be - * to create a separate visitor for computing those values, duplicating the functionality from the - * MethodWriter. - */ - def computeMaxLocalsMaxStack(method: MethodNode): Unit = { - val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) - val excs = method.exceptions.asScala.toArray - val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] - method.accept(mw) - method.maxLocals = mw.getMaxLocals - method.maxStack = mw.getMaxStack - } - def codeSizeOKForInlining(caller: MethodNode, callee: MethodNode): Boolean = { // Looking at the implementation of CodeSizeEvaluator, all instructions except tableswitch and // lookupswitch are <= 8 bytes. These should be rare enough for 8 to be an OK rough upper bound. @@ -308,14 +290,14 @@ object BytecodeUtils { * Clone the local variable descriptors of `methodNode` and map their `start` and `end` labels * according to the `labelMap`. */ - def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String): List[LocalVariableNode] = { + def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String, shift: Int): List[LocalVariableNode] = { methodNode.localVariables.iterator().asScala.map(localVariable => new LocalVariableNode( prefix + localVariable.name, localVariable.desc, localVariable.signature, labelMap(localVariable.start), labelMap(localVariable.end), - localVariable.index + localVariable.index + shift )).toList } @@ -352,15 +334,6 @@ object BytecodeUtils { } } - /** - * A wrapper to make ASM's Analyzer a bit easier to use. - */ - class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, interpreter: Interpreter[V] = new BasicInterpreter) { - val analyzer = new Analyzer(interpreter) - analyzer.analyze(classInternalName, methodNode) - def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) - } - implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal { def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction)) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 96455c0e38..b9788c3f56 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -7,104 +7,92 @@ package scala.tools.nsc package backend.jvm package opt +import scala.collection.immutable.IntMap import scala.reflect.internal.util.{NoPosition, Position} -import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter} import scala.tools.asm.{Opcodes, Type, Handle} import scala.tools.asm.tree._ -import scala.collection.concurrent +import scala.collection.{concurrent, mutable} import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.tools.nsc.backend.jvm.BackendReporting._ -import scala.tools.nsc.backend.jvm.analysis.{NotNull, NullnessAnalyzer} +import scala.tools.nsc.backend.jvm.analysis._ import ByteCodeRepository.{Source, CompilationUnit} import BytecodeUtils._ class CallGraph[BT <: BTypes](val btypes: BT) { import btypes._ + import analyzers._ - val callsites: concurrent.Map[MethodInsnNode, Callsite] = recordPerRunCache(concurrent.TrieMap.empty) - - val closureInstantiations: concurrent.Map[InvokeDynamicInsnNode, ClosureInstantiation] = recordPerRunCache(concurrent.TrieMap.empty) - - def addClass(classNode: ClassNode): Unit = { - val classType = classBTypeFromClassNode(classNode) - for { - m <- classNode.methods.asScala - (calls, closureInits) = analyzeCallsites(m, classType) - } { - calls foreach (callsite => callsites(callsite.callsiteInstruction) = callsite) - closureInits foreach (lmf => closureInstantiations(lmf.indy) = ClosureInstantiation(lmf, m, classType)) - } - } + /** + * The call graph contains the callsites in the program being compiled. + * + * Indexing the call graph by the containing MethodNode and the invocation MethodInsnNode allows + * finding callsites efficiently. For example, an inlining heuristic might want to know all + * callsites withing a callee method. + * + * Note that the call graph is not guaranteed to be complete: callsites may be missing. In + * particular, if a method is very large, all of its callsites might not be in the hash map. + * The reason is that adding a method to the call graph requires running an ASM analyzer, which + * can be too slow. + * + * Note that call graph entries (Callsite instances) keep a reference to the invocation + * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes + * from the classpath to the call graph (in addition to classes being compiled) may prevent + * method instruction nodes from being GCd. The ByteCodeRepository has a fixed size cache for + * parsed ClassNodes - keeping all ClassNodes alive consumed too much memory. + * The call graph is less problematic because only methods being called are kept alive, not entire + * classes. But we should keep an eye on this. + */ + val callsites: mutable.Map[MethodNode, Map[MethodInsnNode, Callsite]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) /** - * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. + * Closure instantiations in the program being compiled. + * + * Indexing closure instantiations by the containing MethodNode is beneficial for the closure + * optimizer: finding callsites to re-write requires running a producers-consumers analysis on + * the method. Here the closure instantiations are already grouped by method. */ - def analyzeCallsites(methodNode: MethodNode, definingClass: ClassBType): (List[Callsite], List[LambdaMetaFactoryCall]) = { + val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) + + def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = { + val methodCallsites = callsites(methodNode) + val newCallsites = methodCallsites - invocation + if (newCallsites.isEmpty) callsites.remove(methodNode) + else callsites(methodNode) = newCallsites + methodCallsites.get(invocation) + } - case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, - annotatedInline: Boolean, annotatedNoInline: Boolean, - warning: Option[CalleeInfoWarning]) + def addCallsite(callsite: Callsite): Unit = { + val methodCallsites = callsites(callsite.callsiteMethod) + callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite) + } - /** - * Analyze a callsite and gather meta-data that can be used for inlining decisions. - */ - def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = { - val methodSignature = calleeMethodNode.name + calleeMethodNode.desc - - try { - // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* - // within a class (not for inherited methods). Since we already have the classBType of the - // callee, we only check there for the methodInlineInfo, we should find it there. - calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { - case Some(methodInlineInfo) => - val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit - - val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) - - // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: - // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined - // - // TODO: type analysis can render more calls statically resolved. Example: - // new A.f // can be inlined, the receiver type is known to be exactly A. - val isStaticallyResolved: Boolean = { - methodInlineInfo.effectivelyFinal || - classBTypeFromParsedClassfile(receiverTypeInternalName).info.orThrow.inlineInfo.isEffectivelyFinal // (1) - } + def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = { + val methodClosureInits = closureInstantiations(methodNode) + val newClosureInits = methodClosureInits - indy + if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode) + else closureInstantiations(methodNode) = newClosureInits + methodClosureInits.get(indy) + } - val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation + def addClosureInstantiation(closureInit: ClosureInstantiation) = { + val methodClosureInits = closureInstantiations(closureInit.ownerMethod) + closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit) + } - val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( - MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) + def addClass(classNode: ClassNode): Unit = { + val classType = classBTypeFromClassNode(classNode) + classNode.methods.asScala.foreach(addMethod(_, classType)) + } - // (1) For invocations of final trait methods, the callee isStaticallyResolved but also - // abstract. Such a callee is not safe to inline - it needs to be re-written to the - // static impl method first (safeToRewrite). - // (2) Final trait methods can be rewritten from the interface to the static implementation - // method to enable inlining. - CallsiteInfo( - safeToInline = - canInlineFromSource && - isStaticallyResolved && // (1) - !isAbstract && - !BytecodeUtils.isConstructor(calleeMethodNode) && - !BytecodeUtils.isNativeMethod(calleeMethodNode), - safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) - annotatedInline = methodInlineInfo.annotatedInline, - annotatedNoInline = methodInlineInfo.annotatedNoInline, - warning = warning) - - case None => - val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) - CallsiteInfo(false, false, false, false, Some(warning)) - } - } catch { - case Invalid(noInfo: NoClassBTypeInfo) => - val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) - CallsiteInfo(false, false, false, false, Some(warning)) - } - } + def addIfMissing(methodNode: MethodNode, definingClass: ClassBType): Unit = { + if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass) + } + /** + * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. + */ + def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = { // TODO: run dataflow analyses to make the call graph more precise // - producers to get forwarded parameters (ForwardedParam) // - typeAnalysis for more precise argument types, more precise callee @@ -112,77 +100,236 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance // call is known to be not-null, in which case we don't have to emit a null check when inlining. // It is also used to get the stack height at the call site. - localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName) - val analyzer: Analyzer[_ <: Value] = { - if (compilerSettings.YoptNullnessTracking) new NullnessAnalyzer - else new Analyzer(new BasicInterpreter) + val analyzer = { + if (compilerSettings.YoptNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer)) + } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName)) + } else None } - analyzer.analyze(definingClass.internalName, methodNode) - def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match { - case nullnessAnalyzer: NullnessAnalyzer => - val frame = nullnessAnalyzer.frameAt(call, methodNode) - frame.getStack(frame.getStackSize - 1 - numArgs).nullness == NotNull + // if the method is too large to run an analyzer, it is not added to the call graph + if (analyzer.nonEmpty) { + val Some(a) = analyzer + def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match { + case nullnessAnalyzer: NullnessAnalyzer => + val frame = nullnessAnalyzer.frameAt(call, methodNode) + frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue + case _ => false + } + + var methodCallsites = Map.empty[MethodInsnNode, Callsite] + var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] + + // lazy so it is only computed if actually used by computeArgInfos + lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) + + methodNode.instructions.iterator.asScala foreach { + case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code + val callee: Either[OptimizerWarning, Callee] = for { + (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] + (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] + declarationClassBType = classBTypeFromClassNode(declarationClassNode) + } yield { + val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) + Callee( + callee = method, + calleeDeclarationClass = declarationClassBType, + safeToInline = safeToInline, + safeToRewrite = safeToRewrite, + annotatedInline = annotatedInline, + annotatedNoInline = annotatedNoInline, + samParamTypes = samParamTypes, + calleeInfoWarning = warning) + } - case _ => false + val argInfos = computeArgInfos(callee, call, prodCons) + + val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { + val numArgs = Type.getArgumentTypes(call.desc).length + receiverNotNullByAnalysis(call, numArgs) + } + + methodCallsites += call -> Callsite( + callsiteInstruction = call, + callsiteMethod = methodNode, + callsiteClass = definingClass, + callee = callee, + argInfos = argInfos, + callsiteStackHeight = a.frameAt(call).getStackSize, + receiverKnownNotNull = receiverNotNull, + callsitePosition = callsitePositions.getOrElse(call, NoPosition) + ) + + case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null => + val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) + methodClosureInstantiations += indy -> ClosureInstantiation( + lmf, + methodNode, + definingClass, + capturedArgInfos) + + case _ => + } + + callsites(methodNode) = methodCallsites + closureInstantiations(methodNode) = methodClosureInstantiations } + } - val callsites = new collection.mutable.ListBuffer[Callsite] - val closureInstantiations = new collection.mutable.ListBuffer[LambdaMetaFactoryCall] - - methodNode.instructions.iterator.asScala foreach { - case call: MethodInsnNode => - val callee: Either[OptimizerWarning, Callee] = for { - (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] - (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] - declarationClassBType = classBTypeFromClassNode(declarationClassNode) - } yield { - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) - Callee( - callee = method, - calleeDeclarationClass = declarationClassBType, - safeToInline = safeToInline, - safeToRewrite = safeToRewrite, - annotatedInline = annotatedInline, - annotatedNoInline = annotatedNoInline, - calleeInfoWarning = warning) - } + def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + if (callee.isLeft) IntMap.empty + else { + lazy val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) + argInfosForSams(callee.get.samParamTypes, callsiteInsn, numArgs, prodCons) + } + } - val argInfos = if (callee.isLeft) Nil else { - // TODO: for now it's Nil, because we don't run any data flow analysis - // there's no point in using the parameter types, that doesn't add any information. - // NOTE: need to run the same analyses after inlining, to re-compute the argInfos for the - // new duplicated callsites, see Inliner.inline - Nil - } + def computeCapturedArgInfos(lmf: LambdaMetaFactoryCall, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + val capturedSams = capturedSamTypes(lmf) + val numCaptures = Type.getArgumentTypes(lmf.indy.desc).length + argInfosForSams(capturedSams, lmf.indy, numCaptures, prodCons) + } - val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { - val numArgs = Type.getArgumentTypes(call.desc).length - receiverNotNullByAnalysis(call, numArgs) + private def argInfosForSams(sams: IntMap[ClassBType], consumerInsn: AbstractInsnNode, numConsumed: => Int, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + // TODO: use type analysis instead of ProdCons - should be more efficient + // some random thoughts: + // - assign special types to parameters and indy-lambda-functions to track them + // - upcast should not change type flow analysis: don't lose information. + // - can we do something about factory calls? Foo(x) for case class foo gives a Foo. + // inline the factory? analysis across method boundary? + + // assign to a lazy val to prevent repeated evaluation of the by-name arg + lazy val prodConsI = prodCons + lazy val firstConsumedSlot = { + val consumerFrame = prodConsI.frameAt(consumerInsn) + consumerFrame.stackTop - numConsumed + 1 + } + sams flatMap { + case (index, _) => + val prods = prodConsI.initialProducersForValueAt(consumerInsn, firstConsumedSlot + index) + if (prods.size != 1) None + else { + val argInfo = prods.head match { + case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral) + case ParameterProducer(local) => Some(ForwardedParam(local)) + case _ => None + } + argInfo.map((index, _)) } + } + } - callsites += Callsite( - callsiteInstruction = call, - callsiteMethod = methodNode, - callsiteClass = definingClass, - callee = callee, - argInfos = argInfos, - callsiteStackHeight = analyzer.frameAt(call, methodNode).getStackSize, - receiverKnownNotNull = receiverNotNull, - callsitePosition = callsitePositions.getOrElse(call, NoPosition) - ) - - case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) => - closureInstantiations += LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) - - case _ => + def samParamTypes(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = { + val paramTypes = { + val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + val isStatic = BytecodeUtils.isStaticMethod(methodNode) + if (isStatic) params else receiverType +: params } + samTypes(paramTypes) + } - (callsites.toList, closureInstantiations.toList) + def capturedSamTypes(lmf: LambdaMetaFactoryCall): IntMap[ClassBType] = { + val capturedTypes = Type.getArgumentTypes(lmf.indy.desc).map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + samTypes(capturedTypes) + } + + private def samTypes(types: Array[BType]): IntMap[ClassBType] = { + var res = IntMap.empty[ClassBType] + for (i <- types.indices) { + types(i) match { + case c: ClassBType => + if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c) + + case _ => + } + } + res } /** + * Just a named tuple used as return type of `analyzeCallsite`. + */ + private case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, + annotatedInline: Boolean, annotatedNoInline: Boolean, + samParamTypes: IntMap[ClassBType], + warning: Option[CalleeInfoWarning]) + + /** + * Analyze a callsite and gather meta-data that can be used for inlining decisions. + */ + private def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = { + val methodSignature = calleeMethodNode.name + calleeMethodNode.desc + + try { + // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* + // within a class (not for inherited methods). Since we already have the classBType of the + // callee, we only check there for the methodInlineInfo, we should find it there. + calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { + case Some(methodInlineInfo) => + val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit + + val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) + + val receiverType = classBTypeFromParsedClassfile(receiverTypeInternalName) + // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: + // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined + // + // TODO: (1) doesn't cover the following example: + // trait TravLike { def map = ... } + // sealed trait List extends TravLike { ... } // assume map is not overridden + // final case class :: / final case object Nil + // (l: List).map // can be inlined + // we need to know that + // - the recevier is sealed + // - what are the children of the receiver + // - all children are final + // - none of the children overrides map + // + // TODO: type analysis can render more calls statically resolved. Example: + // new A.f // can be inlined, the receiver type is known to be exactly A. + val isStaticallyResolved: Boolean = { + methodInlineInfo.effectivelyFinal || + receiverType.info.orThrow.inlineInfo.isEffectivelyFinal // (1) + } + + val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation + + val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( + MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) + + // (1) For invocations of final trait methods, the callee isStaticallyResolved but also + // abstract. Such a callee is not safe to inline - it needs to be re-written to the + // static impl method first (safeToRewrite). + // (2) Final trait methods can be rewritten from the interface to the static implementation + // method to enable inlining. + CallsiteInfo( + safeToInline = + canInlineFromSource && + isStaticallyResolved && // (1) + !isAbstract && + !BytecodeUtils.isConstructor(calleeMethodNode) && + !BytecodeUtils.isNativeMethod(calleeMethodNode), + safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) + annotatedInline = methodInlineInfo.annotatedInline, + annotatedNoInline = methodInlineInfo.annotatedNoInline, + samParamTypes = samParamTypes(calleeMethodNode, receiverType), + warning = warning) + + case None => + val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) + CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning)) + } + } catch { + case Invalid(noInfo: NoClassBTypeInfo) => + val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) + CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning)) + } + } + + /** * A callsite in the call graph. * * @param callsiteInstruction The invocation instruction @@ -197,7 +344,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * @param callsitePosition The source position of the callsite, used for inliner warnings. */ final case class Callsite(callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: Either[OptimizerWarning, Callee], argInfos: List[ArgInfo], + callee: Either[OptimizerWarning, Callee], argInfos: IntMap[ArgInfo], callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position) { override def toString = "Invocation of" + @@ -210,8 +357,9 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * Information about invocation arguments, obtained through data flow analysis of the callsite method. */ sealed trait ArgInfo - final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo + case object FunctionLiteral extends ArgInfo final case class ForwardedParam(index: Int) extends ArgInfo + // final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo // can be extended, e.g., with constant types /** @@ -227,17 +375,29 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * that can be safely re-written to the static implementation method. * @param annotatedInline True if the callee is annotated @inline * @param annotatedNoInline True if the callee is annotated @noinline + * @param samParamTypes A map from parameter positions to SAM parameter types * @param calleeInfoWarning An inliner warning if some information was not available while * gathering the information about this callee. */ final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType, safeToInline: Boolean, safeToRewrite: Boolean, annotatedInline: Boolean, annotatedNoInline: Boolean, + samParamTypes: IntMap[ClassBType], calleeInfoWarning: Option[CalleeInfoWarning]) { assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.") + override def toString = s"Callee($calleeDeclarationClass.${callee.name})" } - final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) { + /** + * Metadata about a closure instantiation, stored in the call graph + * + * @param lambdaMetaFactoryCall the InvokeDynamic instruction + * @param ownerMethod the method where the closure is allocated + * @param ownerClass the class containing the above method + * @param capturedArgInfos information about captured arguments. Used for updating the call + * graph when re-writing a closure invocation to the body method. + */ + final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType, capturedArgInfos: IntMap[ArgInfo]) { override def toString = s"ClosureInstantiation($lambdaMetaFactoryCall, ${ownerMethod.name + ownerMethod.desc}, $ownerClass)" } final case class LambdaMetaFactoryCall(indy: InvokeDynamicInsnNode, samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index b0dc6ead1b..fb7dd16909 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -9,11 +9,11 @@ package opt import scala.annotation.switch import scala.collection.immutable +import scala.collection.immutable.IntMap import scala.reflect.internal.util.NoPosition import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree._ import scala.tools.nsc.backend.jvm.BTypes.InternalName -import scala.tools.nsc.backend.jvm.analysis.ProdConsAnalyzer import BytecodeUtils._ import BackendReporting._ import Opcodes._ @@ -23,6 +23,7 @@ import scala.collection.convert.decorateAsScala._ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + import analyzers._ /** * If a closure is allocated and invoked within the same method, re-write the invocation to the @@ -70,24 +71,19 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { } } - // Grouping the closure instantiations by method allows running the ProdConsAnalyzer only once per - // method. Also sort the instantiations: If there are multiple closure instantiations in a method, - // closure invocations need to be re-written in a consistent order for bytecode stability. The local - // variable slots for storing captured values depends on the order of rewriting. - val closureInstantiationsByMethod: Map[MethodNode, immutable.TreeSet[ClosureInstantiation]] = { - closureInstantiations.values.groupBy(_.ownerMethod).mapValues(immutable.TreeSet.empty ++ _) - } - // For each closure instantiation, a list of callsites of the closure that can be re-written // If a callsite cannot be rewritten, for example because the lambda body method is not accessible, // a warning is returned instead. val callsitesToRewrite: List[(ClosureInstantiation, List[Either[RewriteClosureApplyToClosureBodyFailed, (MethodInsnNode, Int)]])] = { - closureInstantiationsByMethod.iterator.flatMap({ + closureInstantiations.iterator.flatMap({ case (methodNode, closureInits) => // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) - lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.head.ownerClass.internalName) - closureInits.iterator.map(init => (init, closureCallsites(init, prodCons))) - }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod + // We don't need to worry about the method being too large for running an analysis: large + // methods are not added to the call graph / closureInstantiations map. + lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next().ownerClass.internalName) + val sortedInits = immutable.TreeSet.empty ++ closureInits.values + sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))).filter(_._2.nonEmpty) + }).toList // mapping to a list (not a map) to keep the sorting } // Rewrite all closure callsites (or issue inliner warnings for those that cannot be rewritten) @@ -162,7 +158,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { isAccessible } - def pos = callGraph.callsites.get(invocation).map(_.callsitePosition).getOrElse(NoPosition) + def pos = callGraph.callsites(ownerMethod).get(invocation).map(_.callsitePosition).getOrElse(NoPosition) val stackSize: Either[RewriteClosureApplyToClosureBodyFailed, Int] = bodyAccessible match { case Left(w) => Left(RewriteClosureAccessCheckFailed(pos, w)) case Right(false) => Left(RewriteClosureIllegalAccess(pos, ownerClass.internalName)) @@ -210,8 +206,9 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { insertLoadOps(invocation, ownerMethod, argumentLocalsList) // update maxStack - val capturesStackSize = localsForCapturedValues.size - val invocationStackHeight = stackHeight + capturesStackSize - 1 // -1 because the closure is gone + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numCapturedValues = localsForCapturedValues.locals.length + val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone if (invocationStackHeight > ownerMethod.maxStack) ownerMethod.maxStack = invocationStackHeight @@ -237,26 +234,33 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { ownerMethod.instructions.remove(invocation) // update the call graph - val originalCallsite = callGraph.callsites.remove(invocation) + val originalCallsite = callGraph.removeCallsite(invocation, ownerMethod) // the method node is needed for building the call graph entry val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc) def bodyMethodIsBeingCompiled = byteCodeRepository.classNodeAndSource(lambdaBodyHandle.getOwner).map(_._2 == CompilationUnit).getOrElse(false) - val bodyMethodCallsite = Callsite( - callsiteInstruction = bodyInvocation, - callsiteMethod = ownerMethod, - callsiteClass = closureInit.ownerClass, - callee = bodyMethod.map({ - case (bodyMethodNode, bodyMethodDeclClass) => Callee( + val callee = bodyMethod.map({ + case (bodyMethodNode, bodyMethodDeclClass) => + val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass) + Callee( callee = bodyMethodNode, - calleeDeclarationClass = classBTypeFromParsedClassfile(bodyMethodDeclClass), + calleeDeclarationClass = bodyDeclClassType, safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, safeToRewrite = false, // the lambda body method is not a trait interface method annotatedInline = false, annotatedNoInline = false, + samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType), calleeInfoWarning = None) - }), - argInfos = Nil, + }) + val argInfos = closureInit.capturedArgInfos ++ originalCallsite.map(cs => cs.argInfos map { + case (index, info) => (index + numCapturedValues, info) + }).getOrElse(IntMap.empty) + val bodyMethodCallsite = Callsite( + callsiteInstruction = bodyInvocation, + callsiteMethod = ownerMethod, + callsiteClass = closureInit.ownerClass, + callee = callee, + argInfos = argInfos, callsiteStackHeight = invocationStackHeight, receiverKnownNotNull = true, // see below (*) callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition) @@ -266,7 +270,11 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // (corresponding to the receiver) must be non-null" // Explanation: If the lambda body method is non-static, the receiver is a captured // value. It can only be captured within some instance method, so we know it's non-null. - callGraph.callsites(bodyInvocation) = bodyMethodCallsite + callGraph.addCallsite(bodyMethodCallsite) + + // Rewriting a closure invocation may render code unreachable. For example, the body method of + // (x: T) => ??? has return type Nothing$, and an ATHROW is added (see fixLoadedNothingOrNullValue). + unreachableCodeEliminated -= ownerMethod } /** diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala index e7dd5abc57..c885a29e16 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala @@ -47,15 +47,22 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI result.putByte(InlineInfoAttribute.VERSION) - var hasSelfIsFinal = 0 - if (inlineInfo.isEffectivelyFinal) hasSelfIsFinal |= 1 - if (inlineInfo.traitImplClassSelfType.isDefined) hasSelfIsFinal |= 2 - result.putByte(hasSelfIsFinal) + var finalSelfSam = 0 + if (inlineInfo.isEffectivelyFinal) finalSelfSam |= 1 + if (inlineInfo.traitImplClassSelfType.isDefined) finalSelfSam |= 2 + if (inlineInfo.sam.isDefined) finalSelfSam |= 4 + result.putByte(finalSelfSam) for (selfInternalName <- inlineInfo.traitImplClassSelfType) { result.putShort(cw.newUTF8(selfInternalName)) } + for (samNameDesc <- inlineInfo.sam) { + val (name, desc) = samNameDesc.span(_ != '(') + result.putShort(cw.newUTF8(name)) + result.putShort(cw.newUTF8(desc)) + } + // The method count fits in a short (the methods_count in a classfile is also a short) result.putShort(inlineInfo.methodInfos.size) @@ -94,15 +101,20 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI val version = nextByte() if (version == 1) { - val hasSelfIsFinal = nextByte() - val isFinal = (hasSelfIsFinal & 1) != 0 - val hasSelf = (hasSelfIsFinal & 2) != 0 + val finalSelfSam = nextByte() + val isFinal = (finalSelfSam & 1) != 0 + val hasSelf = (finalSelfSam & 2) != 0 + val hasSam = (finalSelfSam & 4) != 0 - val self = if (hasSelf) { + val self = if (!hasSelf) None else { val selfName = nextUTF8() Some(selfName) - } else { - None + } + + val sam = if (!hasSam) None else { + val name = nextUTF8() + val desc = nextUTF8() + Some(name + desc) } val numEntries = nextShort() @@ -118,7 +130,7 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI (name + desc, MethodInlineInfo(isFinal, traitMethodWithStaticImplementation, isInline, isNoInline)) }).toMap - InlineInfoAttribute(InlineInfo(self, isFinal, infos, None)) + InlineInfoAttribute(InlineInfo(self, isFinal, sam, infos, None)) } else { val msg = UnknownScalaInlineInfoVersion(cr.getClassName, version) InlineInfoAttribute(BTypes.EmptyInlineInfo.copy(warning = Some(msg))) @@ -129,8 +141,10 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI object InlineInfoAttribute { /** * [u1] version - * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1) + * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1), hasSam (<< 2) * [u2]? traitImplClassSelfType (reference) + * [u2]? samName (reference) + * [u2]? samDescriptor (reference) * [u2] numMethodEntries * [u2] name (reference) * [u2] descriptor (reference) @@ -145,4 +159,4 @@ object InlineInfoAttribute { * In order to instruct the ASM framework to de-serialize the ScalaInlineInfo attribute, we need * to pass a prototype instance when running the class reader. */ -object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null)) +object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null, null)) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 6b2786c1a3..baa747492f 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -9,7 +9,6 @@ package opt import scala.annotation.tailrec import scala.tools.asm -import asm.Handle import asm.Opcodes._ import asm.tree._ import scala.collection.convert.decorateAsScala._ @@ -17,18 +16,20 @@ import scala.collection.convert.decorateAsJava._ import AsmUtils._ import BytecodeUtils._ import collection.mutable -import scala.tools.asm.tree.analysis.SourceInterpreter +import scala.tools.asm.tree.analysis.{Analyzer, SourceInterpreter} import BackendReporting._ import scala.tools.nsc.backend.jvm.BTypes.InternalName class Inliner[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + import inlinerHeuristics._ + import analyzers._ def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { - case invocation: MethodInsnNode => callGraph.callsites.remove(invocation) - case indy: InvokeDynamicInsnNode => callGraph.closureInstantiations.remove(indy) + case invocation: MethodInsnNode => callGraph.removeCallsite(invocation, methodNode) + case indy: InvokeDynamicInsnNode => callGraph.removeClosureInstantiation(indy, methodNode) case _ => } } @@ -37,7 +38,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { rewriteFinalTraitMethodInvocations() for (request <- collectAndOrderInlineRequests) { - val Right(callee) = request.callee // collectAndOrderInlineRequests returns callsites with a known callee + val callsite = request.callsite + val Right(callee) = callsite.callee // collectAndOrderInlineRequests returns callsites with a known callee // Inlining a method can create unreachable code. Example: // def f = throw e @@ -48,16 +50,14 @@ class Inliner[BT <: BTypes](val btypes: BT) { // DCE above removes unreachable callsites from the call graph. If the inlining request denotes // such an eliminated callsite, do nothing. - if (callGraph.callsites contains request.callsiteInstruction) { - val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass, - callee.callee, callee.calleeDeclarationClass, - request.receiverKnownNotNull, keepLineNumbers = false) + if (callGraph.callsites(callsite.callsiteMethod).contains(callsite.callsiteInstruction)) { + val warnings = inline(request) - for (warning <- r) { + for (warning <- warnings) { if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) { val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else "" val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning" - backendReporting.inlinerWarning(request.callsitePosition, msg) + backendReporting.inlinerWarning(callsite.callsitePosition, msg) } } } @@ -69,81 +69,36 @@ class Inliner[BT <: BTypes](val btypes: BT) { * - Always remove the same request when breaking inlining cycles * - Perform inlinings in a consistent order */ - object callsiteOrdering extends Ordering[Callsite] { - override def compare(x: Callsite, y: Callsite): Int = { - val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName + object callsiteOrdering extends Ordering[InlineRequest] { + override def compare(x: InlineRequest, y: InlineRequest): Int = { + val xCs = x.callsite + val yCs = y.callsite + val cls = xCs.callsiteClass.internalName compareTo yCs.callsiteClass.internalName if (cls != 0) return cls - val name = x.callsiteMethod.name compareTo y.callsiteMethod.name + val name = xCs.callsiteMethod.name compareTo yCs.callsiteMethod.name if (name != 0) return name - val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc + val desc = xCs.callsiteMethod.desc compareTo yCs.callsiteMethod.desc if (desc != 0) return desc def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction) - pos(x) - pos(y) + pos(xCs) - pos(yCs) } } - /** - * Select callsites from the call graph that should be inlined. The resulting list of inlining - * requests is allowed to have cycles, and the callsites can appear in any order. - */ - def selectCallsitesForInlining: List[Callsite] = { - callsites.valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) - - if (!res) { - if (annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) { - // if the callsite is annotated @inline, we report an inline warning even if the underlying - // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). - def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" - def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") - if (doRewriteTraitCallsite(callsite)) - backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) - else if (!safeToInline) - backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) - else - backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) - } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { - // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. - backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) - } - } - - res - - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => - if (warning.emitWarning(compilerSettings)) - backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - }).toList - } - - /** - * The current inlining heuristics are simple: inline calls to methods annotated @inline. - */ - def doInlineCallsite(callsite: Callsite): Boolean = callsite match { - case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline - else annotatedInline && safeToInline - - case _ => false - } - def rewriteFinalTraitMethodInvocations(): Unit = { // Rewriting final trait method callsites to the implementation class enables inlining. // We cannot just iterate over the values of the `callsites` map because the rewrite changes the // map. Therefore we first copy the values to a list. - callsites.values.toList.foreach(rewriteFinalTraitMethodInvocation) + callsites.valuesIterator.flatMap(_.valuesIterator).toList.foreach(rewriteFinalTraitMethodInvocation) } /** * True for statically resolved trait callsites that should be rewritten to the static implementation method. */ def doRewriteTraitCallsite(callsite: Callsite) = callsite.callee match { - case Right(Callee(callee, calleeDeclarationClass, safeToInline, true, annotatedInline, annotatedNoInline, infoWarning)) => true + case Right(Callee(_, _, _, safeToRewrite, _, _, _, _)) => safeToRewrite case _ => false } @@ -156,7 +111,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { */ def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = { if (doRewriteTraitCallsite(callsite)) { - val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, infoWarning)) = callsite.callee + val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, samParamTypes, infoWarning)) = callsite.callee val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc) @@ -188,9 +143,10 @@ class Inliner[BT <: BTypes](val btypes: BT) { // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the // receiver value and add a cast to the self type after each. if (!selfTypeOk) { - // there's no need to run eliminateUnreachableCode here. building the call graph does that - // already, no code can become unreachable in the meantime. - val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new SourceInterpreter) + // We don't need to worry about the method being too large for running an analysis. + // Callsites of large methods are not added to the call graph. + localOpt.minimalRemoveUnreachableCode(callsite.callsiteMethod, callsite.callsiteClass.internalName) + val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new Analyzer(new SourceInterpreter)) val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length) for (i <- receiverValue.insns.asScala) { val cast = new TypeInsnNode(CHECKCAST, selfParamType.internalName) @@ -202,7 +158,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsite.callsiteMethod.instructions.insert(callsite.callsiteInstruction, newCallsiteInstruction) callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction) - callGraph.callsites.remove(callsite.callsiteInstruction) + callGraph.removeCallsite(callsite.callsiteInstruction, callsite.callsiteMethod) + val staticCallSamParamTypes = { + if (selfParamType.info.get.inlineInfo.sam.isEmpty) samParamTypes - 0 + else samParamTypes.updated(0, selfParamType) + } val staticCallsite = Callsite( callsiteInstruction = newCallsiteInstruction, callsiteMethod = callsite.callsiteMethod, @@ -214,19 +174,20 @@ class Inliner[BT <: BTypes](val btypes: BT) { safeToRewrite = false, annotatedInline = annotatedInline, annotatedNoInline = annotatedNoInline, + samParamTypes = staticCallSamParamTypes, calleeInfoWarning = infoWarning)), - argInfos = Nil, + argInfos = callsite.argInfos, callsiteStackHeight = callsite.callsiteStackHeight, receiverKnownNotNull = callsite.receiverKnownNotNull, callsitePosition = callsite.callsitePosition ) - callGraph.callsites(newCallsiteInstruction) = staticCallsite + callGraph.addCallsite(staticCallsite) } for (warning <- res.left) { val Right(callee) = callsite.callee val newCallee = callee.copy(calleeInfoWarning = Some(RewriteTraitCallToStaticImplMethodFailed(calleeDeclarationClass.internalName, callee.callee.name, callee.callee.desc, warning))) - callGraph.callsites(callsite.callsiteInstruction) = callsite.copy(callee = Right(newCallee)) + callGraph.addCallsite(callsite.copy(callee = Right(newCallee))) } } } @@ -238,15 +199,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The resulting list is sorted such that the leaves of the inline request graph are on the left. * Once these leaves are inlined, the successive elements will be leaves, etc. */ - private def collectAndOrderInlineRequests: List[Callsite] = { - val requests = selectCallsitesForInlining + private def collectAndOrderInlineRequests: List[InlineRequest] = { + val requestsByMethod = selectCallsitesForInlining withDefaultValue Set.empty - // This map is an index to look up the inlining requests for a method. The value sets are mutable - // to allow removing elided requests (to break inlining cycles). The map itself is mutable to - // allow efficient building: requests.groupBy would build values as List[Callsite] that need to - // be transformed to mutable sets. - val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty) - for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r + val elided = mutable.Set.empty[InlineRequest] + def nonElidedRequests(methodNode: MethodNode): Set[InlineRequest] = requestsByMethod(methodNode) diff elided /** * Break cycles in the inline request graph by removing callsites. @@ -254,7 +211,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The list `requests` is traversed left-to-right, removing those callsites that are part of a * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map. */ - def breakInlineCycles(requests: List[Callsite]): List[Callsite] = { + def breakInlineCycles: List[InlineRequest] = { // is there a path of inline requests from start to goal? def isReachable(start: MethodNode, goal: MethodNode): Boolean = { @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match { @@ -262,7 +219,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { if (x == goal) true else if (visited(x)) reachableImpl(xs, visited) else { - val callees = inlineRequestsForMethod(x).map(_.callee.get.callee) + val callees = nonElidedRequests(x).map(_.callsite.callee.get.callee) reachableImpl(xs ::: callees.toList, visited + x) } @@ -272,12 +229,14 @@ class Inliner[BT <: BTypes](val btypes: BT) { reachableImpl(List(start), Set.empty) } - val result = new mutable.ListBuffer[Callsite]() + val result = new mutable.ListBuffer[InlineRequest]() + val requests = requestsByMethod.valuesIterator.flatten.toArray // sort the inline requests to ensure that removing requests is deterministic - for (r <- requests.sorted(callsiteOrdering)) { + java.util.Arrays.sort(requests, callsiteOrdering) + for (r <- requests) { // is there a chain of inlining requests that would inline the callsite method into the callee? - if (isReachable(r.callee.get.callee, r.callsiteMethod)) - inlineRequestsForMethod(r.callsiteMethod) -= r + if (isReachable(r.callsite.callee.get.callee, r.callsite.callsiteMethod)) + elided += r else result += r } @@ -286,11 +245,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { // sort the remaining inline requests such that the leaves appear first, then those requests // that become leaves, etc. - def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = { + def leavesFirst(requests: List[InlineRequest], visited: Set[InlineRequest] = Set.empty): List[InlineRequest] = { if (requests.isEmpty) Nil else { val (leaves, others) = requests.partition(r => { - val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee) + val inlineRequestsForCallee = nonElidedRequests(r.callsite.callee.get.callee) inlineRequestsForCallee.forall(visited) }) assert(leaves.nonEmpty, requests) @@ -298,192 +257,232 @@ class Inliner[BT <: BTypes](val btypes: BT) { } } - leavesFirst(breakInlineCycles(requests)) + leavesFirst(breakInlineCycles) } + /** + * Inline the callsites of an inlining request and its post-inlining requests. + * + * @return An inliner warning for each callsite that could not be inlined. + */ + def inline(request: InlineRequest): List[CannotInlineWarning] = canInline(request.callsite) match { + case Some(warning) => List(warning) + case None => + val instructionsMap = inlineCallsite(request.callsite) + val postRequests = request.post.flatMap(post => { + // the post-request invocation instruction might not exist anymore: it might have been + // inlined itself, or eliminated by DCE. + for { + inlinedInvocationInstr <- instructionsMap.get(post.callsiteInstruction).map(_.asInstanceOf[MethodInsnNode]) + inlinedCallsite <- callGraph.callsites(request.callsite.callsiteMethod).get(inlinedInvocationInstr) + } yield InlineRequest(inlinedCallsite, post.post) + }) + postRequests flatMap inline + } /** * Copy and adapt the instructions of a method to a callsite. * * Preconditions: + * - The callsite can safely be inlined (canInline is true) * - The maxLocals and maxStack values of the callsite method are correctly computed - * - The callsite method contains no unreachable basic blocks, i.e., running an [[Analyzer]] - * does not produce any `null` frames + * - The callsite method contains no unreachable basic blocks, i.e., running an Analyzer does + * not produce any `null` frames * - * @param callsiteInstruction The invocation instruction - * @param callsiteStackHeight The stack height at the callsite - * @param callsiteMethod The method in which the invocation occurs - * @param callsiteClass The class in which the callsite method is defined - * @param callee The invoked method - * @param calleeDeclarationClass The class in which the invoked method is defined - * @param receiverKnownNotNull `true` if the receiver is known to be non-null - * @param keepLineNumbers `true` if LineNumberNodes should be copied to the call site - * @return `Some(message)` if inlining cannot be performed, `None` otherwise + * @return A map associating instruction nodes of the callee with the corresponding cloned + * instruction in the callsite method. */ - def inline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType, - receiverKnownNotNull: Boolean, keepLineNumbers: Boolean): Option[CannotInlineWarning] = { - canInline(callsiteInstruction, callsiteStackHeight, callsiteMethod, callsiteClass, callee, calleeDeclarationClass) orElse { - // New labels for the cloned instructions - val labelsMap = cloneLabels(callee) - val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap) - if (!keepLineNumbers) { - removeLineNumberNodes(clonedInstructions) - } + def inlineCallsite(callsite: Callsite): Map[AbstractInsnNode, AbstractInsnNode] = { + import callsite.{callsiteClass, callsiteMethod, callsiteInstruction, receiverKnownNotNull, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} + + // New labels for the cloned instructions + val labelsMap = cloneLabels(callee) + val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap) + val keepLineNumbers = callsiteClass == calleeDeclarationClass + if (!keepLineNumbers) { + removeLineNumberNodes(clonedInstructions) + } - // local vars in the callee are shifted by the number of locals at the callsite - val localVarShift = callsiteMethod.maxLocals - clonedInstructions.iterator.asScala foreach { - case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift - case iinc: IincInsnNode => iinc.`var` += localVarShift - case _ => () - } + // local vars in the callee are shifted by the number of locals at the callsite + val localVarShift = callsiteMethod.maxLocals + clonedInstructions.iterator.asScala foreach { + case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift + case iinc: IincInsnNode => iinc.`var` += localVarShift + case _ => () + } - // add a STORE instruction for each expected argument, including for THIS instance if any - val argStores = new InsnList - var nextLocalIndex = callsiteMethod.maxLocals - if (!isStaticMethod(callee)) { - if (!receiverKnownNotNull) { - argStores.add(new InsnNode(DUP)) - val nonNullLabel = newLabelNode - argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) - argStores.add(new InsnNode(ACONST_NULL)) - argStores.add(new InsnNode(ATHROW)) - argStores.add(nonNullLabel) - } - argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) - nextLocalIndex += 1 + // add a STORE instruction for each expected argument, including for THIS instance if any + val argStores = new InsnList + var nextLocalIndex = callsiteMethod.maxLocals + if (!isStaticMethod(callee)) { + if (!receiverKnownNotNull) { + argStores.add(new InsnNode(DUP)) + val nonNullLabel = newLabelNode + argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) + argStores.add(new InsnNode(ACONST_NULL)) + argStores.add(new InsnNode(ATHROW)) + argStores.add(nonNullLabel) } + argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) + nextLocalIndex += 1 + } - // We just use an asm.Type here, no need to create the MethodBType. - val calleAsmType = asm.Type.getMethodType(callee.desc) + // We just use an asm.Type here, no need to create the MethodBType. + val calleAsmType = asm.Type.getMethodType(callee.desc) + val calleeParamTypes = calleAsmType.getArgumentTypes - for(argTp <- calleAsmType.getArgumentTypes) { - val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp - argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack - nextLocalIndex += argTp.getSize - } + for(argTp <- calleeParamTypes) { + val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp + argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack + nextLocalIndex += argTp.getSize + } - clonedInstructions.insert(argStores) - - // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label. - val postCallLabel = newLabelNode - clonedInstructions.add(postCallLabel) - - // replace xRETURNs: - // - store the return value (if any) - // - clear the stack of the inlined method (insert DROPs) - // - load the return value - // - GOTO postCallLabel - - val returnType = calleAsmType.getReturnType - val hasReturnValue = returnType.getSort != asm.Type.VOID - val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals - nextLocalIndex += returnType.getSize - - def returnValueStore(returnInstruction: AbstractInsnNode) = { - val opc = returnInstruction.getOpcode match { - case IRETURN => ISTORE - case LRETURN => LSTORE - case FRETURN => FSTORE - case DRETURN => DSTORE - case ARETURN => ASTORE - } - new VarInsnNode(opc, returnValueIndex) + clonedInstructions.insert(argStores) + + // label for the exit of the inlined functions. xRETURNs are replaced by GOTOs to this label. + val postCallLabel = newLabelNode + clonedInstructions.add(postCallLabel) + + // replace xRETURNs: + // - store the return value (if any) + // - clear the stack of the inlined method (insert DROPs) + // - load the return value + // - GOTO postCallLabel + + val returnType = calleAsmType.getReturnType + val hasReturnValue = returnType.getSort != asm.Type.VOID + val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals + nextLocalIndex += returnType.getSize + + def returnValueStore(returnInstruction: AbstractInsnNode) = { + val opc = returnInstruction.getOpcode match { + case IRETURN => ISTORE + case LRETURN => LSTORE + case FRETURN => FSTORE + case DRETURN => DSTORE + case ARETURN => ASTORE } + new VarInsnNode(opc, returnValueIndex) + } - // We run an interpreter to know the stack height at each xRETURN instruction and the sizes - // of the values on the stack. - val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) + // We run an interpreter to know the stack height at each xRETURN instruction and the sizes + // of the values on the stack. + // We don't need to worry about the method being too large for running an analysis. Callsites of + // large methods are not added to the call graph. + val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) - for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { - val frame = analyzer.frameAt(originalReturn) - var stackHeight = frame.getStackSize + for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { + val frame = analyzer.frameAt(originalReturn) + var stackHeight = frame.getStackSize - val inlinedReturn = instructionMap(originalReturn) - val returnReplacement = new InsnList + val inlinedReturn = instructionMap(originalReturn) + val returnReplacement = new InsnList - def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) + def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) - // for non-void methods, store the stack top into the return local variable - if (hasReturnValue) { - returnReplacement add returnValueStore(originalReturn) - stackHeight -= 1 - } + // for non-void methods, store the stack top into the return local variable + if (hasReturnValue) { + returnReplacement add returnValueStore(originalReturn) + stackHeight -= 1 + } - // drop the rest of the stack - for (i <- 0 until stackHeight) drop(i) + // drop the rest of the stack + for (i <- 0 until stackHeight) drop(i) - returnReplacement add new JumpInsnNode(GOTO, postCallLabel) - clonedInstructions.insert(inlinedReturn, returnReplacement) - clonedInstructions.remove(inlinedReturn) - } + returnReplacement add new JumpInsnNode(GOTO, postCallLabel) + clonedInstructions.insert(inlinedReturn, returnReplacement) + clonedInstructions.remove(inlinedReturn) + } - // Load instruction for the return value - if (hasReturnValue) { - val retVarLoad = { - val opc = returnType.getOpcode(ILOAD) - new VarInsnNode(opc, returnValueIndex) - } - clonedInstructions.insert(postCallLabel, retVarLoad) + // Load instruction for the return value + if (hasReturnValue) { + val retVarLoad = { + val opc = returnType.getOpcode(ILOAD) + new VarInsnNode(opc, returnValueIndex) } + clonedInstructions.insert(postCallLabel, retVarLoad) + } - callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) - callsiteMethod.instructions.remove(callsiteInstruction) - - callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) - callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) - - // Add all invocation instructions and closure instantiations that were inlined to the call graph - callee.instructions.iterator().asScala foreach { - case originalCallsiteIns: MethodInsnNode => - callGraph.callsites.get(originalCallsiteIns) match { - case Some(originalCallsite) => - val newCallsiteIns = instructionMap(originalCallsiteIns).asInstanceOf[MethodInsnNode] - callGraph.callsites(newCallsiteIns) = Callsite( - callsiteInstruction = newCallsiteIns, - callsiteMethod = callsiteMethod, - callsiteClass = callsiteClass, - callee = originalCallsite.callee, - argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) - callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, - receiverKnownNotNull = originalCallsite.receiverKnownNotNull, - callsitePosition = originalCallsite.callsitePosition - ) - - case None => - } + callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) + callsiteMethod.instructions.remove(callsiteInstruction) + + callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_", localVarShift).asJava) + // prepend the handlers of the callee. the order of handlers matters: when an exception is thrown + // at some instruction, the first handler guarding that instruction and having a matching exception + // type is executed. prepending the callee's handlers makes sure to test those handlers first if + // an exception is thrown in the inlined code. + callsiteMethod.tryCatchBlocks.addAll(0, cloneTryCatchBlockNodes(callee, labelsMap).asJava) + + callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals + val maxStackOfInlinedCode = { + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callee.maxStack + callsiteStackHeight - numStoredArgs + } + val stackHeightAtNullCheck = { + // When adding a null check for the receiver, a DUP is inserted, which might cause a new maxStack. + // If the callsite has other argument values than the receiver on the stack, these are pop'ed + // and stored into locals before the null check, so in that case the maxStack doesn't grow. + val stackSlotForNullCheck = if (!isStaticMethod(callee) && !receiverKnownNotNull && calleeParamTypes.isEmpty) 1 else 0 + callsiteStackHeight + stackSlotForNullCheck + } - case indy: InvokeDynamicInsnNode => - callGraph.closureInstantiations.get(indy) match { - case Some(closureInit) => - val newIndy = instructionMap(indy).asInstanceOf[InvokeDynamicInsnNode] - callGraph.closureInstantiations(newIndy) = ClosureInstantiation(closureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, math.max(stackHeightAtNullCheck, maxStackOfInlinedCode)) - case None => - } + callGraph.addIfMissing(callee, calleeDeclarationClass) - case _ => - } - // Remove the elided invocation from the call graph - callGraph.callsites.remove(callsiteInstruction) - - // Inlining a method body can render some code unreachable, see example above (in runInliner). - unreachableCodeEliminated -= callsiteMethod + def mapArgInfo(argInfo: (Int, ArgInfo)): Option[(Int, ArgInfo)] = argInfo match { + case lit @ (_, FunctionLiteral) => Some(lit) + case (argIndex, ForwardedParam(paramIndex)) => callsite.argInfos.get(paramIndex).map((argIndex, _)) + } - callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight) + // Add all invocation instructions and closure instantiations that were inlined to the call graph + callGraph.callsites(callee).valuesIterator foreach { originalCallsite => + val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] + val argInfos = originalCallsite.argInfos flatMap mapArgInfo + callGraph.addCallsite(Callsite( + callsiteInstruction = newCallsiteIns, + callsiteMethod = callsiteMethod, + callsiteClass = callsiteClass, + callee = originalCallsite.callee, + argInfos = argInfos, + callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, + receiverKnownNotNull = originalCallsite.receiverKnownNotNull, + callsitePosition = originalCallsite.callsitePosition + )) + } - None + callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => + val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] + val capturedArgInfos = originalClosureInit.capturedArgInfos flatMap mapArgInfo + callGraph.addClosureInstantiation( + ClosureInstantiation( + originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), + callsiteMethod, + callsiteClass, + capturedArgInfos) + ) } + + // Remove the elided invocation from the call graph + callGraph.removeCallsite(callsiteInstruction, callsiteMethod) + + // Inlining a method body can render some code unreachable, see example above (in runInliner). + unreachableCodeEliminated -= callsiteMethod + + instructionMap } /** - * Check whether an inling can be performed. Parmeters are described in method [[inline]]. + * Check whether an inling can be performed. * @return `Some(message)` if inlining cannot be performed, `None` otherwise */ - def canInline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType): Option[CannotInlineWarning] = { + def canInline(callsite: Callsite): Option[CannotInlineWarning] = { + import callsite.{callsiteInstruction, callsiteMethod, callsiteClass, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} def calleeDesc = s"${callee.name} of type ${callee.desc} in ${calleeDeclarationClass.internalName}" def methodMismatch = s"Wrong method node for inlining ${textify(callsiteInstruction)}: $calleeDesc" diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala new file mode 100644 index 0000000000..e559b63c09 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -0,0 +1,230 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package opt + +import scala.collection.immutable.IntMap +import scala.tools.asm.Type +import scala.tools.asm.tree.{MethodNode, MethodInsnNode} +import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.collection.convert.decorateAsScala._ + +class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { + import bTypes._ + import inliner._ + import callGraph._ + + case class InlineRequest(callsite: Callsite, post: List[PostInlineRequest]) + case class PostInlineRequest(callsiteInstruction: MethodInsnNode, post: List[PostInlineRequest]) + + /** + * Select callsites from the call graph that should be inlined, grouped by the containing method. + * Cyclic inlining requests are allowed, the inliner will eliminate requests to break cycles. + */ + def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = { + // We should only create inlining requests for callsites being compiled (not for callsites in + // classes on the classpath). The call graph may contain callsites of classes parsed from the + // classpath. In order to get only the callsites being compiled, we start at the map of + // compilingClasses in the byteCodeRepository. + val compilingMethods = for { + classNode <- byteCodeRepository.compilingClasses.valuesIterator + methodNode <- classNode.methods.iterator.asScala + } yield methodNode + + compilingMethods.map(methodNode => { + var requests = Set.empty[InlineRequest] + callGraph.callsites(methodNode).valuesIterator foreach { + case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, _, warning)), _, _, _, pos) => + val request = inlineRequest(callsite) + requests ++= request + if (request.isEmpty) { + if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { + // if the callsite is annotated @inline, we report an inline warning even if the underlying + // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). + def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" + def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") + if (doRewriteTraitCallsite(callsite)) + backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) + else if (!safeToInline) + backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) + else + backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) + } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { + // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. + backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) + } + } + + case Callsite(ins, _, _, Left(warning), _, _, _, pos) => + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") + } + (methodNode, requests) + }).filterNot(_._2.isEmpty).toMap + } + + /** + * Returns the inline request for a callsite if the callsite should be inlined according to the + * current heuristics (`-Yopt-inline-heuristics`). + * + * The resulting inline request may contain post-inlining requests of callsites that in turn are + * also selected as individual inlining requests. + */ + def inlineRequest(callsite: Callsite): Option[InlineRequest] = compilerSettings.YoptInlineHeuristics.value match { + case "everything" => + if (callsite.callee.get.safeToInline) Some(InlineRequest(callsite, Nil)) + else None + + case "at-inline-annotated" => + val callee = callsite.callee.get + if (callee.safeToInline && callee.annotatedInline) Some(InlineRequest(callsite, Nil)) + else None + + case "default" => + val callee = callsite.callee.get + if (callee.safeToInline && !callee.annotatedNoInline) { + val shouldInlineHO = callee.samParamTypes.nonEmpty && (callee.samParamTypes exists { + case (index, _) => callsite.argInfos.contains(index) + }) + + if (shouldInlineHO || callee.annotatedInline) Some(InlineRequest(callsite, Nil)) + else None + } else None + } + + /* + // using http://lihaoyi.github.io/Ammonite/ + + load.ivy("com.google.guava" % "guava" % "18.0") + val javaUtilFunctionClasses = { + val rt = System.getProperty("sun.boot.class.path").split(":").find(_.endsWith("lib/rt.jar")).get + val u = new java.io.File(rt).toURL + val l = new java.net.URLClassLoader(Array(u)) + val cp = com.google.common.reflect.ClassPath.from(l) + cp.getTopLevelClasses("java.util.function").toArray.map(_.toString).toList + } + + // found using IntelliJ's "Find Usages" on the @FunctionalInterface annotation + val otherClasses = List( + "com.sun.javafx.css.parser.Recognizer", + "java.awt.KeyEventDispatcher", + "java.awt.KeyEventPostProcessor", + "java.io.FileFilter", + "java.io.FilenameFilter", + "java.lang.Runnable", + "java.lang.Thread$UncaughtExceptionHandler", + "java.nio.file.DirectoryStream$Filter", + "java.nio.file.PathMatcher", + "java.time.temporal.TemporalAdjuster", + "java.time.temporal.TemporalQuery", + "java.util.Comparator", + "java.util.concurrent.Callable", + "java.util.logging.Filter", + "java.util.prefs.PreferenceChangeListener", + "javafx.animation.Interpolatable", + "javafx.beans.InvalidationListener", + "javafx.beans.value.ChangeListener", + "javafx.collections.ListChangeListener", + "javafx.collections.MapChangeListener", + "javafx.collections.SetChangeListener", + "javafx.event.EventHandler", + "javafx.util.Builder", + "javafx.util.BuilderFactory", + "javafx.util.Callback" + ) + + val allClasses = javaUtilFunctionClasses ::: otherClasses + + load.ivy("org.ow2.asm" % "asm" % "5.0.4") + val classesAndSamNameDesc = allClasses.map(c => { + val cls = Class.forName(c) + val internalName = org.objectweb.asm.Type.getDescriptor(cls).drop(1).dropRight(1) // drop L and ; + val sams = cls.getMethods.filter(m => { + (m.getModifiers & java.lang.reflect.Modifier.ABSTRACT) != 0 && + m.getName != "equals" // Comparator has an abstract override of "equals" for adding Javadoc + }) + assert(sams.size == 1, internalName + sams.map(_.getName)) + val sam = sams.head + val samDesc = org.objectweb.asm.Type.getMethodDescriptor(sam) + (internalName, sam.getName, samDesc) + }) + println(classesAndSamNameDesc map { + case (cls, nme, desc) => s"""("$cls", "$nme$desc")""" + } mkString ("", ",\n", "\n")) + */ + private val javaSams: Map[String, String] = Map( + ("java/util/function/BiConsumer", "accept(Ljava/lang/Object;Ljava/lang/Object;)V"), + ("java/util/function/BiFunction", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BiPredicate", "test(Ljava/lang/Object;Ljava/lang/Object;)Z"), + ("java/util/function/BinaryOperator", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BooleanSupplier", "getAsBoolean()Z"), + ("java/util/function/Consumer", "accept(Ljava/lang/Object;)V"), + ("java/util/function/DoubleBinaryOperator", "applyAsDouble(DD)D"), + ("java/util/function/DoubleConsumer", "accept(D)V"), + ("java/util/function/DoubleFunction", "apply(D)Ljava/lang/Object;"), + ("java/util/function/DoublePredicate", "test(D)Z"), + ("java/util/function/DoubleSupplier", "getAsDouble()D"), + ("java/util/function/DoubleToIntFunction", "applyAsInt(D)I"), + ("java/util/function/DoubleToLongFunction", "applyAsLong(D)J"), + ("java/util/function/DoubleUnaryOperator", "applyAsDouble(D)D"), + ("java/util/function/Function", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/IntBinaryOperator", "applyAsInt(II)I"), + ("java/util/function/IntConsumer", "accept(I)V"), + ("java/util/function/IntFunction", "apply(I)Ljava/lang/Object;"), + ("java/util/function/IntPredicate", "test(I)Z"), + ("java/util/function/IntSupplier", "getAsInt()I"), + ("java/util/function/IntToDoubleFunction", "applyAsDouble(I)D"), + ("java/util/function/IntToLongFunction", "applyAsLong(I)J"), + ("java/util/function/IntUnaryOperator", "applyAsInt(I)I"), + ("java/util/function/LongBinaryOperator", "applyAsLong(JJ)J"), + ("java/util/function/LongConsumer", "accept(J)V"), + ("java/util/function/LongFunction", "apply(J)Ljava/lang/Object;"), + ("java/util/function/LongPredicate", "test(J)Z"), + ("java/util/function/LongSupplier", "getAsLong()J"), + ("java/util/function/LongToDoubleFunction", "applyAsDouble(J)D"), + ("java/util/function/LongToIntFunction", "applyAsInt(J)I"), + ("java/util/function/LongUnaryOperator", "applyAsLong(J)J"), + ("java/util/function/ObjDoubleConsumer", "accept(Ljava/lang/Object;D)V"), + ("java/util/function/ObjIntConsumer", "accept(Ljava/lang/Object;I)V"), + ("java/util/function/ObjLongConsumer", "accept(Ljava/lang/Object;J)V"), + ("java/util/function/Predicate", "test(Ljava/lang/Object;)Z"), + ("java/util/function/Supplier", "get()Ljava/lang/Object;"), + ("java/util/function/ToDoubleBiFunction", "applyAsDouble(Ljava/lang/Object;Ljava/lang/Object;)D"), + ("java/util/function/ToDoubleFunction", "applyAsDouble(Ljava/lang/Object;)D"), + ("java/util/function/ToIntBiFunction", "applyAsInt(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/function/ToIntFunction", "applyAsInt(Ljava/lang/Object;)I"), + ("java/util/function/ToLongBiFunction", "applyAsLong(Ljava/lang/Object;Ljava/lang/Object;)J"), + ("java/util/function/ToLongFunction", "applyAsLong(Ljava/lang/Object;)J"), + ("java/util/function/UnaryOperator", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("com/sun/javafx/css/parser/Recognizer", "recognize(I)Z"), + ("java/awt/KeyEventDispatcher", "dispatchKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/awt/KeyEventPostProcessor", "postProcessKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/io/FileFilter", "accept(Ljava/io/File;)Z"), + ("java/io/FilenameFilter", "accept(Ljava/io/File;Ljava/lang/String;)Z"), + ("java/lang/Runnable", "run()V"), + ("java/lang/Thread$UncaughtExceptionHandler", "uncaughtException(Ljava/lang/Thread;Ljava/lang/Throwable;)V"), + ("java/nio/file/DirectoryStream$Filter", "accept(Ljava/lang/Object;)Z"), + ("java/nio/file/PathMatcher", "matches(Ljava/nio/file/Path;)Z"), + ("java/time/temporal/TemporalAdjuster", "adjustInto(Ljava/time/temporal/Temporal;)Ljava/time/temporal/Temporal;"), + ("java/time/temporal/TemporalQuery", "queryFrom(Ljava/time/temporal/TemporalAccessor;)Ljava/lang/Object;"), + ("java/util/Comparator", "compare(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/concurrent/Callable", "call()Ljava/lang/Object;"), + ("java/util/logging/Filter", "isLoggable(Ljava/util/logging/LogRecord;)Z"), + ("java/util/prefs/PreferenceChangeListener", "preferenceChange(Ljava/util/prefs/PreferenceChangeEvent;)V"), + ("javafx/animation/Interpolatable", "interpolate(Ljava/lang/Object;D)Ljava/lang/Object;"), + ("javafx/beans/InvalidationListener", "invalidated(Ljavafx/beans/Observable;)V"), + ("javafx/beans/value/ChangeListener", "changed(Ljavafx/beans/value/ObservableValue;Ljava/lang/Object;Ljava/lang/Object;)V"), + ("javafx/collections/ListChangeListener", "onChanged(Ljavafx/collections/ListChangeListener$Change;)V"), + ("javafx/collections/MapChangeListener", "onChanged(Ljavafx/collections/MapChangeListener$Change;)V"), + ("javafx/collections/SetChangeListener", "onChanged(Ljavafx/collections/SetChangeListener$Change;)V"), + ("javafx/event/EventHandler", "handle(Ljavafx/event/Event;)V"), + ("javafx/util/Builder", "build()Ljava/lang/Object;"), + ("javafx/util/BuilderFactory", "getBuilder(Ljava/lang/Class;)Ljavafx/util/Builder;"), + ("javafx/util/Callback", "call(Ljava/lang/Object;)Ljava/lang/Object;") + ) + def javaSam(internalName: InternalName): Option[String] = javaSams.get(internalName) +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index 4132710a96..1e7b46012e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -8,8 +8,7 @@ package backend.jvm package opt import scala.annotation.switch -import scala.tools.asm.Opcodes -import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter} +import scala.tools.asm.{Type, ClassWriter, MethodWriter, Opcodes} import scala.tools.asm.tree._ import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName @@ -49,19 +48,53 @@ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ class LocalOpt[BT <: BTypes](val btypes: BT) { import LocalOptImpls._ import btypes._ + import analyzers._ + + /** + * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM + * framework only computes these values during bytecode generation. + * + * Since there's currently no better way, we run a bytecode generator on the method and extract + * the computed values. This required changes to the ASM codebase: + * - the [[MethodWriter]] class was made public + * - accessors for maxLocals / maxStack were added to the MethodWriter class + * + * We could probably make this faster (and allocate less memory) by hacking the ASM framework + * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be + * to create a separate visitor for computing those values, duplicating the functionality from the + * MethodWriter. + * + * NOTE: the maxStack value computed by this method allocates two slots for long / double values, + * as required by the JVM spec. For running an Analyzer, one slot per long / double would be fine. + * See comment in `analysis` package object. + */ + def computeMaxLocalsMaxStack(method: MethodNode): Unit = { + if (!maxLocalsMaxStackComputed(method)) { + method.maxLocals = 0 + method.maxStack = 0 + val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) + val excs = method.exceptions.asScala.toArray + val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] + method.accept(mw) + method.maxLocals = mw.getMaxLocals + method.maxStack = mw.getMaxStack + maxLocalsMaxStackComputed += method + } + } /** * Remove unreachable code from a method. * * This implementation only removes instructions that are unreachable for an ASM analyzer / * interpreter. This ensures that future analyses will not produce `null` frames. The inliner - * and call graph builder depend on this property. + * depends on this property. * * @return A set containing the eliminated instructions */ def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Set[AbstractInsnNode] = { if (method.instructions.size == 0) return Set.empty // fast path for abstract methods if (unreachableCodeEliminated(method)) return Set.empty // we know there is no unreachable code + if (!AsmAnalyzer.sizeOKForBasicValue(method)) return Set.empty // the method is too large for running an analyzer // For correctness, after removing unreachable code, we have to eliminate empty exception // handlers, see scaladoc of def methodOptimizations. Removing an live handler may render more @@ -137,9 +170,10 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar // for local variables in dead blocks. Maybe that's a bug in the ASM framework. + def canRunDCE = AsmAnalyzer.sizeOKForBasicValue(method) def removalRound(): Boolean = { // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt) - val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode) { + val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode && canRunDCE) { val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName) val removedHandlers = removeEmptyExceptionHandlers(method) (removedInstructions.nonEmpty, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start))) @@ -179,47 +213,55 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved } -} - -object LocalOptImpls { /** * Removes unreachable basic blocks. * - * TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow. - * * @return A set containing eliminated instructions, and a set containing all live label nodes. */ def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Set[AbstractInsnNode], Set[LabelNode]) = { - // The data flow analysis requires the maxLocals / maxStack fields of the method to be computed. - computeMaxLocalsMaxStack(method) - val a = new Analyzer(new BasicInterpreter) - a.analyze(ownerClassName, method) - val frames = a.getFrames + val a = new AsmAnalyzer(method, ownerClassName) + val frames = a.analyzer.getFrames - val initialSize = method.instructions.size var i = 0 var liveLabels = Set.empty[LabelNode] var removedInstructions = Set.empty[AbstractInsnNode] + var maxLocals = Type.getArgumentsAndReturnSizes(method.desc) >> 2 - (if (BytecodeUtils.isStaticMethod(method)) 1 else 0) + var maxStack = 0 val itr = method.instructions.iterator() while (itr.hasNext) { - itr.next() match { - case l: LabelNode => - if (frames(i) != null) liveLabels += l + val insn = itr.next() + val isLive = frames(i) != null + if (isLive) maxStack = math.max(maxStack, frames(i).getStackSize) - case ins => + insn match { + case l: LabelNode => // label nodes are not removed: they might be referenced for example in a LocalVariableNode - if (frames(i) == null || ins.getOpcode == Opcodes.NOP) { + if (isLive) liveLabels += l + + case v: VarInsnNode if isLive => + val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0 + maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 becauase local numbers are 0-based + + case i: IincInsnNode if isLive => + maxLocals = math.max(maxLocals, i.`var` + 1) + + case _ => + if (!isLive || insn.getOpcode == Opcodes.NOP) { // Instruction iterators allow removing during iteration. // Removing is O(1): instructions are doubly linked list elements. itr.remove() - removedInstructions += ins + removedInstructions += insn } } i += 1 } + method.maxLocals = maxLocals + method.maxStack = maxStack (removedInstructions, liveLabels) } +} +object LocalOptImpls { /** * Remove exception handlers that cover empty code blocks. A block is considered empty if it * consist only of labels, frames, line numbers, nops and gotos. @@ -312,10 +354,7 @@ object LocalOptImpls { // Add the index of the local variable used by `varIns` to the `renumber` array. def addVar(varIns: VarInsnNode): Unit = { val index = varIns.`var` - val isWide = (varIns.getOpcode: @switch) match { - case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true - case _ => false - } + val isWide = isSize2LoadOrStore(varIns.getOpcode) // Ensure the length of `renumber`. Unused variable indices are mapped to -1. val minLength = if (isWide) index + 2 else index + 1 diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index 3422167d02..74d152a4cf 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -270,19 +270,20 @@ trait ScalaSettings extends AbsScalaSettings def YoptInlinerEnabled = YoptInlineProject || YoptInlineGlobal def YoptBuildCallGraph = YoptInlinerEnabled || YoptClosureElimination - def YoptAddToBytecodeRepository = YoptInlinerEnabled || YoptClosureElimination + def YoptAddToBytecodeRepository = YoptBuildCallGraph || YoptInlinerEnabled || YoptClosureElimination val YoptInlineHeuristics = ChoiceSetting( name = "-Yopt-inline-heuristics", helpArg = "strategy", descr = "Set the heuristics for inlining decisions.", - choices = List("at-inline-annotated", "everything"), - default = "at-inline-annotated") + choices = List("at-inline-annotated", "everything", "default"), + default = "default") object YoptWarningsChoices extends MultiChoiceEnumeration { val none = Choice("none" , "No optimizer warnings.") val atInlineFailedSummary = Choice("at-inline-failed-summary" , "One-line summary if there were @inline method calls that could not be inlined.") val atInlineFailed = Choice("at-inline-failed" , "A detailed warning for each @inline method call that could not be inlined.") + val anyInlineFailed = Choice("any-inline-failed" , "A detailed warning for every callsite that was chosen for inlining by the heuristics, but could not be inlined.") val noInlineMixed = Choice("no-inline-mixed" , "In mixed compilation, warn at callsites methods defined in java sources (the inlining decision cannot be made without bytecode).") val noInlineMissingBytecode = Choice("no-inline-missing-bytecode" , "Warn if an inlining decision cannot be made because a the bytecode of a class or member cannot be found on the compilation classpath.") val noInlineMissingScalaInlineInfoAttr = Choice("no-inline-missing-attribute", "Warn if an inlining decision cannot be made because a Scala classfile does not have a ScalaInlineInfo attribute.") @@ -301,7 +302,8 @@ trait ScalaSettings extends AbsScalaSettings def YoptWarningEmitAtInlineFailed = !YoptWarnings.isSetByUser || YoptWarnings.contains(YoptWarningsChoices.atInlineFailedSummary) || - YoptWarnings.contains(YoptWarningsChoices.atInlineFailed) + YoptWarnings.contains(YoptWarningsChoices.atInlineFailed) || + YoptWarnings.contains(YoptWarningsChoices.anyInlineFailed) def YoptWarningNoInlineMixed = YoptWarnings.contains(YoptWarningsChoices.noInlineMixed) def YoptWarningNoInlineMissingBytecode = YoptWarnings.contains(YoptWarningsChoices.noInlineMissingBytecode) diff --git a/src/library/scala/collection/immutable/List.scala b/src/library/scala/collection/immutable/List.scala index 7b1997252d..eb095dbbc2 100644 --- a/src/library/scala/collection/immutable/List.scala +++ b/src/library/scala/collection/immutable/List.scala @@ -266,7 +266,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] (b.toList, these) } - @noinline // TODO - fix optimizer bug that requires noinline (see SI-8334) final override def map[B, That](f: A => B)(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { @@ -285,7 +284,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] else super.map(f) } - @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334) final override def collect[B, That](pf: PartialFunction[A, B])(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { @@ -315,7 +313,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] else super.collect(pf) } - @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334) final override def flatMap[B, That](f: A => GenTraversableOnce[B])(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { diff --git a/src/reflect/scala/reflect/internal/Definitions.scala b/src/reflect/scala/reflect/internal/Definitions.scala index a3d9368915..5ce5c39145 100644 --- a/src/reflect/scala/reflect/internal/Definitions.scala +++ b/src/reflect/scala/reflect/internal/Definitions.scala @@ -797,7 +797,9 @@ trait Definitions extends api.StandardDefinitions { * The class defining the method is a supertype of `tp` that * has a public no-arg primary constructor. */ - def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else { + def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else findSam(tp) + + def findSam(tp: Type): Symbol = { // if tp has a constructor, it must be public and must not take any arguments // (not even an implicit argument list -- to keep it simple for now) val tpSym = tp.typeSymbol |