From 5217b00082d1a7bce2cd44666cb93dab7cb93f0a Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 14 Aug 2015 14:25:18 +0200 Subject: minor clenaups --- .../scala/tools/nsc/backend/jvm/BTypes.scala | 4 +++- .../tools/nsc/backend/jvm/BTypesFromSymbols.scala | 2 +- .../tools/nsc/backend/jvm/BackendReporting.scala | 2 +- .../scala/tools/nsc/backend/jvm/GenBCode.scala | 2 +- .../nsc/backend/jvm/opt/ByteCodeRepository.scala | 22 +++++++++++++--------- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala index 8720da84e8..55d181823b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala @@ -38,7 +38,7 @@ abstract class BTypes { /** * Tools for parsing classfiles, used by the inliner. */ - val byteCodeRepository: ByteCodeRepository + val byteCodeRepository: ByteCodeRepository[this.type] val localOpt: LocalOpt[this.type] @@ -554,6 +554,8 @@ abstract class BTypes { * Terminology * ----------- * + * Diagram here: https://blogs.oracle.com/darcy/entry/nested_inner_member_and_top + * * - Nested class (JLS 8): class whose declaration occurs within the body of another class * * - Top-level class (JLS 8): non-nested class diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index 45d9cc3ff3..218cf0a416 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -36,7 +36,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { val coreBTypes = new CoreBTypesProxy[this.type](this) import coreBTypes._ - val byteCodeRepository = new ByteCodeRepository(global.classPath, javaDefinedClasses, recordPerRunCache(collection.concurrent.TrieMap.empty)) + val byteCodeRepository: ByteCodeRepository[this.type] = new ByteCodeRepository(global.classPath, this) val localOpt: LocalOpt[this.type] = new LocalOpt(this) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala index b41d0de92f..8526f7e0ae 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala @@ -1,7 +1,7 @@ package scala.tools.nsc package backend.jvm -import scala.tools.asm.tree.{InvokeDynamicInsnNode, AbstractInsnNode, MethodNode} +import scala.tools.asm.tree.{AbstractInsnNode, MethodNode} import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.reflect.internal.util.Position import scala.tools.nsc.settings.ScalaSettings diff --git a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala index 00b4b8b667..a19b5f6e2c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala @@ -186,7 +186,7 @@ abstract class GenBCode extends BCodeSyncAndTry { // -------------- "plain" class -------------- val pcb = new PlainClassBuilder(cunit) pcb.genPlainClass(cd) - val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null; + val outF = if (needsOutFolder) getOutFolder(claszSymbol, pcb.thisName, cunit) else null val plainC = pcb.cnode // -------------- bean info class, if needed -------------- diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index a5b85e54e7..8fc976a1c5 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -24,15 +24,19 @@ import java.util.concurrent.atomic.AtomicLong * classpath. Parsed classes are cached in the `classes` map. * * @param classPath The compiler classpath where classfiles are searched and read from. - * @param classes Cache for parsed ClassNodes. Also stores the source of the bytecode: - * [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode - * corresponds to a class being compiled. - * The `Long` field encodes the age of the node in the map, which allows removing - * old entries when the map grows too large. - * For Java classes in mixed compilation, the map contains an error message: no - * ClassNode is generated by the backend and also no classfile that could be parsed. */ -class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJavaSourceDefined: InternalName => Boolean, val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]]) { +class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFile], val btypes: BT) { + import btypes._ + + /** + * Cache for parsed ClassNodes. Also stores the source of the bytecode: [[Classfile]] if read + * from `classPath`, [[CompilationUnit]] if the bytecode corresponds to a class being compiled. + * The `Long` field encodes the age of the node in the map, which allows removing old entries when + * the map grows too large (see limitCacheSize). + * For Java classes in mixed compilation, the map contains an error message: no ClassNode is + * generated by the backend and also no classfile that could be parsed. + */ + val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]] = recordPerRunCache(collection.concurrent.TrieMap.empty) private val maxCacheSize = 1500 private val targetSize = 500 @@ -157,7 +161,7 @@ class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val isJav classNode } match { case Some(node) => Right(node) - case None => Left(ClassNotFound(internalName, isJavaSourceDefined(internalName))) + case None => Left(ClassNotFound(internalName, javaDefinedClasses(internalName))) } } } -- cgit v1.2.3 From 0ed745364891e4e5f78e51ac9f3aad111b5c7bb3 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 17 Aug 2015 14:35:22 +0200 Subject: Group call graph by method Change the hash maps in the call graph to index callsites and closure instantiations by the containing method. This is beneficial for implementing inliner heuristics and the closure optimizer. --- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 80 ++++++++++++++++----- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 21 ++---- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 68 ++++++++---------- .../tools/nsc/backend/jvm/opt/CallGraphTest.scala | 82 +++++++++------------- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 8 +-- 5 files changed, 138 insertions(+), 121 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 96455c0e38..e11ae08a8c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -11,7 +11,7 @@ import scala.reflect.internal.util.{NoPosition, Position} import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter} import scala.tools.asm.{Opcodes, Type, Handle} import scala.tools.asm.tree._ -import scala.collection.concurrent +import scala.collection.{concurrent, mutable} import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.tools.nsc.backend.jvm.BackendReporting._ @@ -22,25 +22,41 @@ import BytecodeUtils._ class CallGraph[BT <: BTypes](val btypes: BT) { import btypes._ - val callsites: concurrent.Map[MethodInsnNode, Callsite] = recordPerRunCache(concurrent.TrieMap.empty) + /** + * The call graph contains the callsites in the program being compiled. + * + * Indexing the call graph by the containing MethodNode and the invocation MethodInsnNode allows + * finding callsites efficiently. For example, an inlining heuristic might want to know all + * callsites withing a callee method. + * + * Note that call graph entries (Callsite instances) keep a reference to the invocation + * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes + * from the classpath to the call graph (in addition to classes being compiled) may prevent + * method instruction nodes from being GCd. The ByteCodeRepository has a fixed size cache for + * parsed ClassNodes - keeping all ClassNodes alive consumed too much memory. + * The call graph is less problematic because only methods being called are kept alive, not entire + * classes. But we should keep an eye on this. + */ + val callsites: mutable.Map[MethodNode, Map[MethodInsnNode, Callsite]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) - val closureInstantiations: concurrent.Map[InvokeDynamicInsnNode, ClosureInstantiation] = recordPerRunCache(concurrent.TrieMap.empty) + /** + * Closure instantiations in the program being compiled. + * + * Indexing closure instantiations by the containing MethodNode is beneficial for the closure + * optimizer: finding callsites to re-write requires running a producers-consumers analysis on + * the method. Here the closure instantiations are already grouped by method. + */ + val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) def addClass(classNode: ClassNode): Unit = { val classType = classBTypeFromClassNode(classNode) - for { - m <- classNode.methods.asScala - (calls, closureInits) = analyzeCallsites(m, classType) - } { - calls foreach (callsite => callsites(callsite.callsiteInstruction) = callsite) - closureInits foreach (lmf => closureInstantiations(lmf.indy) = ClosureInstantiation(lmf, m, classType)) - } + classNode.methods.asScala.foreach(addMethod(_, classType)) } /** * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. */ - def analyzeCallsites(methodNode: MethodNode, definingClass: ClassBType): (List[Callsite], List[LambdaMetaFactoryCall]) = { + def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = { case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, annotatedInline: Boolean, annotatedNoInline: Boolean, @@ -128,8 +144,8 @@ class CallGraph[BT <: BTypes](val btypes: BT) { case _ => false } - val callsites = new collection.mutable.ListBuffer[Callsite] - val closureInstantiations = new collection.mutable.ListBuffer[LambdaMetaFactoryCall] + var methodCallsites = Map.empty[MethodInsnNode, Callsite] + var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] methodNode.instructions.iterator.asScala foreach { case call: MethodInsnNode => @@ -162,7 +178,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { receiverNotNullByAnalysis(call, numArgs) } - callsites += Callsite( + methodCallsites += call -> Callsite( callsiteInstruction = call, callsiteMethod = methodNode, callsiteClass = definingClass, @@ -174,15 +190,45 @@ class CallGraph[BT <: BTypes](val btypes: BT) { ) case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) => - closureInstantiations += LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + methodClosureInstantiations += indy -> ClosureInstantiation( + LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType), + methodNode, + definingClass) case _ => } - (callsites.toList, closureInstantiations.toList) + callsites(methodNode) = methodCallsites + closureInstantiations(methodNode) = methodClosureInstantiations } - /** + def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = { + val methodCallsites = callsites(methodNode) + val newCallsites = methodCallsites - invocation + if (newCallsites.isEmpty) callsites.remove(methodNode) + else callsites(methodNode) = newCallsites + methodCallsites.get(invocation) + } + + def addCallsite(callsite: Callsite): Unit = { + val methodCallsites = callsites(callsite.callsiteMethod) + callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite) + } + + def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = { + val methodClosureInits = closureInstantiations(methodNode) + val newClosureInits = methodClosureInits - indy + if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode) + else closureInstantiations(methodNode) = newClosureInits + methodClosureInits.get(indy) + } + + def addClosureInstantiation(closureInit: ClosureInstantiation) = { + val methodClosureInits = closureInstantiations(closureInit.ownerMethod) + closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit) + } + + /** * A callsite in the call graph. * * @param callsiteInstruction The invocation instruction diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 92b9b34006..3fbc374a93 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -70,23 +70,16 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { } } - // Grouping the closure instantiations by method allows running the ProdConsAnalyzer only once per - // method. Also sort the instantiations: If there are multiple closure instantiations in a method, - // closure invocations need to be re-written in a consistent order for bytecode stability. The local - // variable slots for storing captured values depends on the order of rewriting. - val closureInstantiationsByMethod: Map[MethodNode, immutable.TreeSet[ClosureInstantiation]] = { - closureInstantiations.values.groupBy(_.ownerMethod).mapValues(immutable.TreeSet.empty ++ _) - } - // For each closure instantiation, a list of callsites of the closure that can be re-written // If a callsite cannot be rewritten, for example because the lambda body method is not accessible, // a warning is returned instead. val callsitesToRewrite: List[(ClosureInstantiation, List[Either[RewriteClosureApplyToClosureBodyFailed, (MethodInsnNode, Int)]])] = { - closureInstantiationsByMethod.iterator.flatMap({ + closureInstantiations.iterator.flatMap({ case (methodNode, closureInits) => // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) - lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.head.ownerClass.internalName) - closureInits.iterator.map(init => (init, closureCallsites(init, prodCons))) + lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next.ownerClass.internalName) + val sortedInits = immutable.TreeSet.empty ++ closureInits.values + sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))) }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod } @@ -162,7 +155,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { isAccessible } - def pos = callGraph.callsites.get(invocation).map(_.callsitePosition).getOrElse(NoPosition) + def pos = callGraph.callsites(ownerMethod).get(invocation).map(_.callsitePosition).getOrElse(NoPosition) val stackSize: Either[RewriteClosureApplyToClosureBodyFailed, Int] = bodyAccessible match { case Left(w) => Left(RewriteClosureAccessCheckFailed(pos, w)) case Right(false) => Left(RewriteClosureIllegalAccess(pos, ownerClass.internalName)) @@ -237,7 +230,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { ownerMethod.instructions.remove(invocation) // update the call graph - val originalCallsite = callGraph.callsites.remove(invocation) + val originalCallsite = callGraph.removeCallsite(invocation, ownerMethod) // the method node is needed for building the call graph entry val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc) @@ -266,7 +259,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // (corresponding to the receiver) must be non-null" // Explanation: If the lambda body method is non-static, the receiver is a captured // value. It can only be captured within some instance method, so we know it's non-null. - callGraph.callsites(bodyInvocation) = bodyMethodCallsite + callGraph.addCallsite(bodyMethodCallsite) } /** diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 8477f5461a..c5d2d212f4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -27,8 +27,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { - case invocation: MethodInsnNode => callGraph.callsites.remove(invocation) - case indy: InvokeDynamicInsnNode => callGraph.closureInstantiations.remove(indy) + case invocation: MethodInsnNode => callGraph.removeCallsite(invocation, methodNode) + case indy: InvokeDynamicInsnNode => callGraph.removeClosureInstantiation(indy, methodNode) case _ => } } @@ -48,7 +48,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { // DCE above removes unreachable callsites from the call graph. If the inlining request denotes // such an eliminated callsite, do nothing. - if (callGraph.callsites contains request.callsiteInstruction) { + if (callGraph.callsites(request.callsiteMethod).contains(request.callsiteInstruction)) { val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass, callee.callee, callee.calleeDeclarationClass, request.receiverKnownNotNull, keepLineNumbers = false) @@ -90,7 +90,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * requests is allowed to have cycles, and the callsites can appear in any order. */ def selectCallsitesForInlining: List[Callsite] = { - callsites.valuesIterator.filter({ + callsites.valuesIterator.flatMap(_.valuesIterator.filter({ case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => val res = doInlineCallsite(callsite) @@ -118,7 +118,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { if (warning.emitWarning(compilerSettings)) backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") false - }).toList + })).toList } /** @@ -136,7 +136,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { // Rewriting final trait method callsites to the implementation class enables inlining. // We cannot just iterate over the values of the `callsites` map because the rewrite changes the // map. Therefore we first copy the values to a list. - callsites.values.toList.foreach(rewriteFinalTraitMethodInvocation) + callsites.valuesIterator.flatMap(_.valuesIterator).toList.foreach(rewriteFinalTraitMethodInvocation) } /** @@ -202,7 +202,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsite.callsiteMethod.instructions.insert(callsite.callsiteInstruction, newCallsiteInstruction) callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction) - callGraph.callsites.remove(callsite.callsiteInstruction) + callGraph.removeCallsite(callsite.callsiteInstruction, callsite.callsiteMethod) val staticCallsite = Callsite( callsiteInstruction = newCallsiteInstruction, callsiteMethod = callsite.callsiteMethod, @@ -220,13 +220,13 @@ class Inliner[BT <: BTypes](val btypes: BT) { receiverKnownNotNull = callsite.receiverKnownNotNull, callsitePosition = callsite.callsitePosition ) - callGraph.callsites(newCallsiteInstruction) = staticCallsite + callGraph.addCallsite(staticCallsite) } for (warning <- res.left) { val Right(callee) = callsite.callee val newCallee = callee.copy(calleeInfoWarning = Some(RewriteTraitCallToStaticImplMethodFailed(calleeDeclarationClass.internalName, callee.callee.name, callee.callee.desc, warning))) - callGraph.callsites(callsite.callsiteInstruction) = callsite.copy(callee = Right(newCallee)) + callGraph.addCallsite(callsite.copy(callee = Right(newCallee))) } } } @@ -435,38 +435,30 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) // Add all invocation instructions and closure instantiations that were inlined to the call graph - callee.instructions.iterator().asScala foreach { - case originalCallsiteIns: MethodInsnNode => - callGraph.callsites.get(originalCallsiteIns) match { - case Some(originalCallsite) => - val newCallsiteIns = instructionMap(originalCallsiteIns).asInstanceOf[MethodInsnNode] - callGraph.callsites(newCallsiteIns) = Callsite( - callsiteInstruction = newCallsiteIns, - callsiteMethod = callsiteMethod, - callsiteClass = callsiteClass, - callee = originalCallsite.callee, - argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) - callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, - receiverKnownNotNull = originalCallsite.receiverKnownNotNull, - callsitePosition = originalCallsite.callsitePosition - ) - - case None => - } - - case indy: InvokeDynamicInsnNode => - callGraph.closureInstantiations.get(indy) match { - case Some(closureInit) => - val newIndy = instructionMap(indy).asInstanceOf[InvokeDynamicInsnNode] - callGraph.closureInstantiations(newIndy) = ClosureInstantiation(closureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) - - case None => - } + callGraph.callsites(callee).valuesIterator foreach { originalCallsite => + val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] + callGraph.addCallsite(Callsite( + callsiteInstruction = newCallsiteIns, + callsiteMethod = callsiteMethod, + callsiteClass = callsiteClass, + callee = originalCallsite.callee, + argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) + callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, + receiverKnownNotNull = originalCallsite.receiverKnownNotNull, + callsitePosition = originalCallsite.callsitePosition + ) + ) + } - case _ => + callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => + val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] + callGraph.addClosureInstantiation( + ClosureInstantiation(originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) + ) } + // Remove the elided invocation from the call graph - callGraph.callsites.remove(callsiteInstruction) + callGraph.removeCallsite(callsiteInstruction, callsiteMethod) // Inlining a method body can render some code unreachable, see example above (in runInliner). unreachableCodeEliminated -= callsiteMethod diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala index 9fda034a04..45ef810796 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala @@ -94,59 +94,45 @@ class CallGraphTest { val List(cf1Call, cf2Call, cf3Call, cf4Call, cf5Call, cf6Call, cf7Call, cg1Call) = callsInMethod(t1) val List(df1Call, df2Call, df3Call, df4Call, df5Call, df6Call, df7Call, dg1Call) = callsInMethod(t2) - def checkCallsite(callsite: callGraph.Callsite, - call: MethodInsnNode, callsiteMethod: MethodNode, target: MethodNode, calleeDeclClass: ClassBType, - safeToInline: Boolean, atInline: Boolean, atNoInline: Boolean) = try { - assert(callsite.callsiteInstruction == call) - assert(callsite.callsiteMethod == callsiteMethod) - val callee = callsite.callee.get - assert(callee.callee == target) - assert(callee.calleeDeclarationClass == calleeDeclClass) - assert(callee.safeToInline == safeToInline) - assert(callee.annotatedInline == atInline) - assert(callee.annotatedNoInline == atNoInline) - - assert(callsite.argInfos == List()) // not defined yet - } catch { - case e: Throwable => println(callsite); throw e + def checkCallsite(call: MethodInsnNode, callsiteMethod: MethodNode, target: MethodNode, calleeDeclClass: ClassBType, + safeToInline: Boolean, atInline: Boolean, atNoInline: Boolean) = { + val callsite = callGraph.callsites(callsiteMethod)(call) + try { + assert(callsite.callsiteInstruction == call) + assert(callsite.callsiteMethod == callsiteMethod) + val callee = callsite.callee.get + assert(callee.callee == target) + assert(callee.calleeDeclarationClass == calleeDeclClass) + assert(callee.safeToInline == safeToInline) + assert(callee.annotatedInline == atInline) + assert(callee.annotatedNoInline == atNoInline) + + assert(callsite.argInfos == List()) // not defined yet + } catch { + case e: Throwable => println(callsite); throw e + } } val cClassBType = classBTypeFromClassNode(cCls) val cMClassBType = classBTypeFromClassNode(cMod) val dClassBType = classBTypeFromClassNode(dCls) - checkCallsite(callGraph.callsites(cf1Call), - cf1Call, t1, cf1, cClassBType, false, false, false) - checkCallsite(callGraph.callsites(cf2Call), - cf2Call, t1, cf2, cClassBType, true, false, false) - checkCallsite(callGraph.callsites(cf3Call), - cf3Call, t1, cf3, cClassBType, false, true, false) - checkCallsite(callGraph.callsites(cf4Call), - cf4Call, t1, cf4, cClassBType, true, true, false) - checkCallsite(callGraph.callsites(cf5Call), - cf5Call, t1, cf5, cClassBType, false, false, true) - checkCallsite(callGraph.callsites(cf6Call), - cf6Call, t1, cf6, cClassBType, true, false, true) - checkCallsite(callGraph.callsites(cf7Call), - cf7Call, t1, cf7, cClassBType, false, true, true) - checkCallsite(callGraph.callsites(cg1Call), - cg1Call, t1, g1, cMClassBType, true, false, false) - - checkCallsite(callGraph.callsites(df1Call), - df1Call, t2, df1, dClassBType, false, true, false) - checkCallsite(callGraph.callsites(df2Call), - df2Call, t2, cf2, cClassBType, true, false, false) - checkCallsite(callGraph.callsites(df3Call), - df3Call, t2, df3, dClassBType, true, false, false) - checkCallsite(callGraph.callsites(df4Call), - df4Call, t2, cf4, cClassBType, true, true, false) - checkCallsite(callGraph.callsites(df5Call), - df5Call, t2, cf5, cClassBType, false, false, true) - checkCallsite(callGraph.callsites(df6Call), - df6Call, t2, cf6, cClassBType, true, false, true) - checkCallsite(callGraph.callsites(df7Call), - df7Call, t2, cf7, cClassBType, false, true, true) - checkCallsite(callGraph.callsites(dg1Call), - dg1Call, t2, g1, cMClassBType, true, false, false) + checkCallsite(cf1Call, t1, cf1, cClassBType, false, false, false) + checkCallsite(cf2Call, t1, cf2, cClassBType, true, false, false) + checkCallsite(cf3Call, t1, cf3, cClassBType, false, true, false) + checkCallsite(cf4Call, t1, cf4, cClassBType, true, true, false) + checkCallsite(cf5Call, t1, cf5, cClassBType, false, false, true) + checkCallsite(cf6Call, t1, cf6, cClassBType, true, false, true) + checkCallsite(cf7Call, t1, cf7, cClassBType, false, true, true) + checkCallsite(cg1Call, t1, g1, cMClassBType, true, false, false) + + checkCallsite(df1Call, t2, df1, dClassBType, false, true, false) + checkCallsite(df2Call, t2, cf2, cClassBType, true, false, false) + checkCallsite(df3Call, t2, df3, dClassBType, true, false, false) + checkCallsite(df4Call, t2, cf4, cClassBType, true, true, false) + checkCallsite(df5Call, t2, cf5, cClassBType, false, false, true) + checkCallsite(df6Call, t2, cf6, cClassBType, true, false, true) + checkCallsite(df7Call, t2, cf7, cClassBType, false, true, true) + checkCallsite(dg1Call, t2, g1, cMClassBType, true, false, false) } } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index 617eced560..baba1f3c95 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -273,7 +273,7 @@ class InlinerTest extends ClearAfterClass { assert(gIns contains invokeG, gIns) // f is inlined into g, g invokes itself recursively assert(callGraph.callsites.size == 3, callGraph.callsites) - for (callsite <- callGraph.callsites.values if methods.contains(callsite.callsiteMethod)) { + for (callsite <- callGraph.callsites.valuesIterator.flatMap(_.valuesIterator) if methods.contains(callsite.callsiteMethod)) { checkCallsite(callsite, g) } } @@ -295,8 +295,8 @@ class InlinerTest extends ClearAfterClass { assert(gIns.count(_ == invokeG) == 2, gIns) assert(hIns.count(_ == invokeG) == 2, hIns) - assert(callGraph.callsites.size == 7, callGraph.callsites) - for (callsite <- callGraph.callsites.values if methods.contains(callsite.callsiteMethod)) { + assert(callGraph.callsites.valuesIterator.flatMap(_.valuesIterator).size == 7, callGraph.callsites) + for (callsite <- callGraph.callsites.valuesIterator.flatMap(_.valuesIterator) if methods.contains(callsite.callsiteMethod)) { checkCallsite(callsite, g) } } @@ -336,7 +336,7 @@ class InlinerTest extends ClearAfterClass { |} """.stripMargin val List(c) = compile(code) - assert(callGraph.callsites.values exists (_.callsiteInstruction.name == "clone")) + assert(callGraph.callsites.valuesIterator.flatMap(_.valuesIterator) exists (_.callsiteInstruction.name == "clone")) } @Test -- cgit v1.2.3 From 14fc88b8fffe3338805afb5a4fb40843a14fda1c Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 17 Aug 2015 14:51:04 +0200 Subject: Move call graph and code repo building to the same place --- .../tools/nsc/backend/jvm/BCodeSkelBuilder.scala | 6 ------ .../scala/tools/nsc/backend/jvm/GenBCode.scala | 19 +++++++++++++------ .../scala/tools/nsc/settings/ScalaSettings.scala | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala index a9b6a312e9..075a44ca22 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala @@ -9,7 +9,6 @@ package backend package jvm import scala.collection.{ mutable, immutable } -import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository import scala.tools.nsc.symtab._ import scala.tools.asm @@ -140,11 +139,6 @@ abstract class BCodeSkelBuilder extends BCodeHelpers { if (AsmUtils.traceClassEnabled && cnode.name.contains(AsmUtils.traceClassPattern)) AsmUtils.traceClass(cnode) - if (settings.YoptAddToBytecodeRepository) { - // The inliner needs to find all classes in the code repo, also those being compiled - byteCodeRepository.add(cnode, ByteCodeRepository.CompilationUnit) - } - assert(cd.symbol == claszSymbol, "Someone messed up BCodePhase.claszSymbol during genPlainClass().") } // end of method genPlainClass() diff --git a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala index a19b5f6e2c..e1a724f1cb 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala @@ -14,6 +14,7 @@ import scala.reflect.internal.util.Statistics import scala.tools.asm import scala.tools.asm.tree.ClassNode +import scala.tools.nsc.backend.jvm.opt.ByteCodeRepository /* * Prepare in-memory representations of classfiles using the ASM Tree API, and serialize them to disk. @@ -221,12 +222,18 @@ abstract class GenBCode extends BCodeSyncAndTry { class Worker2 { def runGlobalOptimizations(): Unit = { import scala.collection.convert.decorateAsScala._ - if (settings.YoptBuildCallGraph) { - q2.asScala foreach { - case Item2(_, _, plain, _, _) => - // skip mirror / bean: wd don't inline into tem, and they are not used in the plain class - if (plain != null) callGraph.addClass(plain) - } + + // add classes to the bytecode repo before building the call graph: the latter needs to + // look up classes and methods in the code repo. + if (settings.YoptAddToBytecodeRepository) q2.asScala foreach { + case Item2(_, mirror, plain, bean, _) => + if (mirror != null) byteCodeRepository.add(mirror, ByteCodeRepository.CompilationUnit) + if (plain != null) byteCodeRepository.add(plain, ByteCodeRepository.CompilationUnit) + if (bean != null) byteCodeRepository.add(bean, ByteCodeRepository.CompilationUnit) + } + if (settings.YoptBuildCallGraph) q2.asScala foreach { item => + // skip call graph for mirror / bean: wd don't inline into tem, and they are not used in the plain class + if (item.plain != null) callGraph.addClass(item.plain) } if (settings.YoptInlinerEnabled) bTypes.inliner.runInliner() diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index 3422167d02..f6223926bf 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -270,7 +270,7 @@ trait ScalaSettings extends AbsScalaSettings def YoptInlinerEnabled = YoptInlineProject || YoptInlineGlobal def YoptBuildCallGraph = YoptInlinerEnabled || YoptClosureElimination - def YoptAddToBytecodeRepository = YoptInlinerEnabled || YoptClosureElimination + def YoptAddToBytecodeRepository = YoptBuildCallGraph || YoptInlinerEnabled || YoptClosureElimination val YoptInlineHeuristics = ChoiceSetting( name = "-Yopt-inline-heuristics", -- cgit v1.2.3 From 3deb2242cba85a618da88dd98846290f359ab3a6 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Tue, 18 Aug 2015 11:21:12 +0200 Subject: Separate hash maps in the code repo for classes being compiled or not Store classes being compiled in a separate hash map. This allows efficiently traversing all classes being compiled. It also simplifies limiting the size of the cache of class nodes parsed from classfiles. Also change the cache of class nodes parsed from classfiles to LRU instead of FIFO. --- .../nsc/backend/jvm/opt/ByteCodeRepository.scala | 61 +++++++++++++++------- .../tools/nsc/backend/jvm/opt/CallGraphTest.scala | 2 +- .../nsc/backend/jvm/opt/ClosureOptimizerTest.scala | 2 +- .../tools/nsc/backend/jvm/opt/InlineInfoTest.scala | 5 +- .../backend/jvm/opt/InlinerIllegalAccessTest.scala | 2 +- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 6 ++- 6 files changed, 54 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index 8fc976a1c5..dd1d70c65f 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -10,6 +10,7 @@ package opt import scala.tools.asm import asm.tree._ import scala.collection.convert.decorateAsScala._ +import scala.collection.concurrent import scala.tools.asm.Attribute import scala.tools.nsc.backend.jvm.BackendReporting._ import scala.tools.nsc.io.AbstractFile @@ -29,38 +30,47 @@ class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFi import btypes._ /** - * Cache for parsed ClassNodes. Also stores the source of the bytecode: [[Classfile]] if read - * from `classPath`, [[CompilationUnit]] if the bytecode corresponds to a class being compiled. + * ClassNodes for classes being compiled in the current compilation run. + */ + val compilingClasses: concurrent.Map[InternalName, ClassNode] = recordPerRunCache(concurrent.TrieMap.empty) + + /** + * Cache for parsed ClassNodes. * The `Long` field encodes the age of the node in the map, which allows removing old entries when * the map grows too large (see limitCacheSize). * For Java classes in mixed compilation, the map contains an error message: no ClassNode is * generated by the backend and also no classfile that could be parsed. */ - val classes: collection.concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Source, Long)]] = recordPerRunCache(collection.concurrent.TrieMap.empty) + val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty) private val maxCacheSize = 1500 private val targetSize = 500 - private val idCounter = new AtomicLong(0) + private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable { + def clear(): Unit = { this.set(0l) } + } + recordPerRunCache(lruCounter) /** * Prevent the code repository from growing too large. Profiling reveals that the average size * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb. - * - * We can only remove classes with `Source == Classfile`, those can be parsed again if requested. */ private def limitCacheSize(): Unit = { - if (classes.count(c => c._2.isRight && c._2.right.get._2 == Classfile) > maxCacheSize) { - val removeId = idCounter.get - targetSize - val toRemove = classes.iterator.collect({ - case (name, Right((_, Classfile, id))) if id < removeId => name - }).toList - toRemove foreach classes.remove + if (parsedClasses.size > maxCacheSize) { + // OK if multiple threads get here + val minimalLRU = parsedClasses.valuesIterator.collect({ + case Right((_, lru)) => lru + }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue) + parsedClasses retain { + case (_, Right((_, lru))) => lru > minimalLRU + case _ => false + } } } def add(classNode: ClassNode, source: Source) = { - classes(classNode.name) = Right((classNode, source, idCounter.incrementAndGet())) + if (source == CompilationUnit) compilingClasses(classNode.name) = classNode + else parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet())) } /** @@ -68,18 +78,32 @@ class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFi * parsed from the classfile on the compile classpath. */ def classNodeAndSource(internalName: InternalName): Either[ClassNotFound, (ClassNode, Source)] = { - val r = classes.getOrElseUpdate(internalName, { - limitCacheSize() - parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet())) + classNode(internalName) map (n => { + val source = if (compilingClasses contains internalName) CompilationUnit else Classfile + (n, source) }) - r.map(v => (v._1, v._2)) } /** * The class node for an internal name. If the class node is not yet available, it is parsed from * the classfile on the compile classpath. */ - def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = classNodeAndSource(internalName).map(_._1) + def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = { + compilingClasses.get(internalName).map(Right(_)) getOrElse { + val r = parsedClasses.get(internalName) match { + case Some(l @ Left(_)) => l + case Some(r @ Right((classNode, _))) => + parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet())) + r + case None => + limitCacheSize() + val res = parseClass(internalName).map((_, lruCounter.incrementAndGet())) + parsedClasses(internalName) = res + res + } + r.map(_._1) + } + } /** * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`. @@ -90,7 +114,6 @@ class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFi */ def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = { def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = { - def msg = s"The field node $name$descriptor could not be found in class $classInternalName or any of its superclasses." classNode(parent) match { case Left(e) => Left(FieldNotFound(name, descriptor, classInternalName, Some(e))) case Right(c) => diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala index 45ef810796..715db3f8c2 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala @@ -28,7 +28,7 @@ class CallGraphTest { import compiler.genBCode.bTypes._ // allows inspecting the caches after a compilation run - val notPerRun: List[Clearable] = List(classBTypeFromInternalName, byteCodeRepository.classes, callGraph.callsites) + val notPerRun: List[Clearable] = List(classBTypeFromInternalName, byteCodeRepository.compilingClasses, byteCodeRepository.parsedClasses, callGraph.callsites) notPerRun foreach compiler.perRunCaches.unrecordCache def compile(code: String, allowMessage: StoreReporter#Info => Boolean): List[ClassNode] = { diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala index 69eed1f75d..258813ea68 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala @@ -29,7 +29,7 @@ import scala.collection.convert.decorateAsScala._ import scala.tools.testing.ClearAfterClass object ClosureOptimizerTest extends ClearAfterClass.Clearable { - var compiler = newCompiler(extraArgs = "-Yopt:l:classpath -Yopt-warnings") + var compiler = newCompiler(extraArgs = "-Yopt:l:classpath -Yopt-warnings:_") def clear(): Unit = { compiler = null } } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlineInfoTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlineInfoTest.scala index 5ccb940415..c25933e63e 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlineInfoTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlineInfoTest.scala @@ -22,7 +22,10 @@ object InlineInfoTest extends ClearAfterClass.Clearable { var compiler = newCompiler(extraArgs = "-Ybackend:GenBCode -Yopt:l:classpath") def clear(): Unit = { compiler = null } - def notPerRun: List[Clearable] = List(compiler.genBCode.bTypes.classBTypeFromInternalName, compiler.genBCode.bTypes.byteCodeRepository.classes) + def notPerRun: List[Clearable] = List( + compiler.genBCode.bTypes.classBTypeFromInternalName, + compiler.genBCode.bTypes.byteCodeRepository.compilingClasses, + compiler.genBCode.bTypes.byteCodeRepository.parsedClasses) notPerRun foreach compiler.perRunCaches.unrecordCache } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerIllegalAccessTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerIllegalAccessTest.scala index 7ed0e13226..f1be44a094 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerIllegalAccessTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerIllegalAccessTest.scala @@ -67,7 +67,7 @@ class InlinerIllegalAccessTest extends ClearAfterClass { check(dClass, assertEmpty) check(eClass, assertEmpty) // C is public, so accessible in E - byteCodeRepository.classes.clear() + byteCodeRepository.parsedClasses.clear() classBTypeFromInternalName.clear() cClass.access &= ~ACC_PUBLIC // ftw diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index baba1f3c95..c6313a84d2 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -33,7 +33,11 @@ object InlinerTest extends ClearAfterClass.Clearable { var compiler = newCompiler(extraArgs = args) // allows inspecting the caches after a compilation run - def notPerRun: List[Clearable] = List(compiler.genBCode.bTypes.classBTypeFromInternalName, compiler.genBCode.bTypes.byteCodeRepository.classes, compiler.genBCode.bTypes.callGraph.callsites) + def notPerRun: List[Clearable] = List( + compiler.genBCode.bTypes.classBTypeFromInternalName, + compiler.genBCode.bTypes.byteCodeRepository.compilingClasses, + compiler.genBCode.bTypes.byteCodeRepository.parsedClasses, + compiler.genBCode.bTypes.callGraph.callsites) notPerRun foreach compiler.perRunCaches.unrecordCache def clear(): Unit = { compiler = null } -- cgit v1.2.3 From 617c0923bd6b5d1642d04a03508f063d503776b6 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 20 Aug 2015 12:28:30 +0200 Subject: Store SAM information in ClassBTypes If a class (trait) is a SAM type, store the name and descriptor of the SAM in the ClassBType's InlineInfo. --- .../tools/nsc/backend/jvm/BCodeAsmCommon.scala | 13 +- .../scala/tools/nsc/backend/jvm/BTypes.scala | 8 +- .../tools/nsc/backend/jvm/BTypesFromSymbols.scala | 6 +- .../nsc/backend/jvm/opt/ByteCodeRepository.scala | 5 + .../nsc/backend/jvm/opt/InlineInfoAttribute.scala | 40 ++++-- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 2 + .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 146 +++++++++++++++++++++ .../scala/reflect/internal/Definitions.scala | 4 +- .../nsc/backend/jvm/opt/ScalaInlineInfoTest.scala | 54 +++++++- 9 files changed, 255 insertions(+), 23 deletions(-) create mode 100644 src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala index 93f5159f89..42738d3e1c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeAsmCommon.scala @@ -390,6 +390,17 @@ final class BCodeAsmCommon[G <: Global](val global: G) { val isEffectivelyFinal = classSym.isEffectivelyFinal + val sam = { + if (classSym.isImplClass || classSym.isEffectivelyFinal) None + else { + // Phase travel necessary. For example, nullary methods (getter of an abstract val) get an + // empty parameter list in later phases and would therefore be picked as SAM. + val samSym = exitingPickler(definitions.findSam(classSym.tpe)) + if (samSym == NoSymbol) None + else Some(samSym.javaSimpleName.toString + methodSymToDescriptor(samSym)) + } + } + var warning = Option.empty[ClassSymbolInfoFailureSI9111] // Primitive methods cannot be inlined, so there's no point in building a MethodInlineInfo. Also, some @@ -447,7 +458,7 @@ final class BCodeAsmCommon[G <: Global](val global: G) { } }).toMap - InlineInfo(traitSelfType, isEffectivelyFinal, methodInlineInfos, warning) + InlineInfo(traitSelfType, isEffectivelyFinal, sam, methodInlineInfos, warning) } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala index 55d181823b..12cd9c7f5c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala @@ -234,6 +234,7 @@ abstract class BTypes { InlineInfo( traitImplClassSelfType = None, isEffectivelyFinal = BytecodeUtils.isFinalClass(classNode), + sam = inliner.heuristics.javaSam(classNode.name), methodInfos = methodInfos, warning) } @@ -1106,6 +1107,8 @@ object BTypes { * Metadata about a ClassBType, used by the inliner. * * More information may be added in the future to enable more elaborate inlinine heuristics. + * Note that this class should contain information that can only be obtained from the ClassSymbol. + * Information that can be computed from the ClassNode should be added to the call graph instead. * * @param traitImplClassSelfType `Some(tp)` if this InlineInfo describes a trait, and the `self` * parameter type of the methods in the implementation class is not @@ -1124,6 +1127,8 @@ object BTypes { * @param isEffectivelyFinal True if the class cannot have subclasses: final classes, module * classes, trait impl classes. * + * @param sam If this class is a SAM type, the SAM's "$name$descriptor". + * * @param methodInfos The [[MethodInlineInfo]]s for the methods declared in this class. * The map is indexed by the string s"$name$descriptor" (to * disambiguate overloads). @@ -1134,10 +1139,11 @@ object BTypes { */ final case class InlineInfo(traitImplClassSelfType: Option[InternalName], isEffectivelyFinal: Boolean, + sam: Option[String], methodInfos: Map[String, MethodInlineInfo], warning: Option[ClassInlineInfoWarning]) - val EmptyInlineInfo = InlineInfo(None, false, Map.empty, None) + val EmptyInlineInfo = InlineInfo(None, false, None, Map.empty, None) /** * Metadata about a method, used by the inliner. diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index 218cf0a416..160c1283f1 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -8,7 +8,7 @@ package backend.jvm import scala.tools.asm import scala.tools.nsc.backend.jvm.opt._ -import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo, InternalName} +import scala.tools.nsc.backend.jvm.BTypes._ import BackendReporting._ import scala.tools.nsc.settings.ScalaSettings @@ -444,7 +444,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { case Right(classNode) => inlineInfoFromClassfile(classNode) case Left(missingClass) => - InlineInfo(None, false, Map.empty, Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass))) + EmptyInlineInfo.copy(warning = Some(ClassNotFoundWhenBuildingInlineInfoFromSymbol(missingClass))) } } } @@ -467,7 +467,7 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { flags = asm.Opcodes.ACC_SUPER | asm.Opcodes.ACC_PUBLIC | asm.Opcodes.ACC_FINAL, nestedClasses = nested, nestedInfo = None, - InlineInfo(None, true, Map.empty, None))) // no InlineInfo needed, scala never invokes methods on the mirror class + inlineInfo = EmptyInlineInfo.copy(isEffectivelyFinal = true))) // no method inline infos needed, scala never invokes methods on the mirror class c }) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index dd1d70c65f..4492d0baf5 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -132,6 +132,11 @@ class ByteCodeRepository[BT <: BTypes](val classPath: ClassFileLookup[AbstractFi * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`. * The declaration of the method may be in one of the parents. * + * TODO: make sure we always return the right method, the one being invoked. write tests. + * - if there's an abstract and a concrete one. could possibly somehow the abstract be returned? + * - with traits and default methods, if there is more than one default method inherited and + * no override: what should be returned? We should not just inline one of the two. + * * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring * class, or an error message if the method could not be found. */ diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala index e7dd5abc57..c885a29e16 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlineInfoAttribute.scala @@ -47,15 +47,22 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI result.putByte(InlineInfoAttribute.VERSION) - var hasSelfIsFinal = 0 - if (inlineInfo.isEffectivelyFinal) hasSelfIsFinal |= 1 - if (inlineInfo.traitImplClassSelfType.isDefined) hasSelfIsFinal |= 2 - result.putByte(hasSelfIsFinal) + var finalSelfSam = 0 + if (inlineInfo.isEffectivelyFinal) finalSelfSam |= 1 + if (inlineInfo.traitImplClassSelfType.isDefined) finalSelfSam |= 2 + if (inlineInfo.sam.isDefined) finalSelfSam |= 4 + result.putByte(finalSelfSam) for (selfInternalName <- inlineInfo.traitImplClassSelfType) { result.putShort(cw.newUTF8(selfInternalName)) } + for (samNameDesc <- inlineInfo.sam) { + val (name, desc) = samNameDesc.span(_ != '(') + result.putShort(cw.newUTF8(name)) + result.putShort(cw.newUTF8(desc)) + } + // The method count fits in a short (the methods_count in a classfile is also a short) result.putShort(inlineInfo.methodInfos.size) @@ -94,15 +101,20 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI val version = nextByte() if (version == 1) { - val hasSelfIsFinal = nextByte() - val isFinal = (hasSelfIsFinal & 1) != 0 - val hasSelf = (hasSelfIsFinal & 2) != 0 + val finalSelfSam = nextByte() + val isFinal = (finalSelfSam & 1) != 0 + val hasSelf = (finalSelfSam & 2) != 0 + val hasSam = (finalSelfSam & 4) != 0 - val self = if (hasSelf) { + val self = if (!hasSelf) None else { val selfName = nextUTF8() Some(selfName) - } else { - None + } + + val sam = if (!hasSam) None else { + val name = nextUTF8() + val desc = nextUTF8() + Some(name + desc) } val numEntries = nextShort() @@ -118,7 +130,7 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI (name + desc, MethodInlineInfo(isFinal, traitMethodWithStaticImplementation, isInline, isNoInline)) }).toMap - InlineInfoAttribute(InlineInfo(self, isFinal, infos, None)) + InlineInfoAttribute(InlineInfo(self, isFinal, sam, infos, None)) } else { val msg = UnknownScalaInlineInfoVersion(cr.getClassName, version) InlineInfoAttribute(BTypes.EmptyInlineInfo.copy(warning = Some(msg))) @@ -129,8 +141,10 @@ case class InlineInfoAttribute(inlineInfo: InlineInfo) extends Attribute(InlineI object InlineInfoAttribute { /** * [u1] version - * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1) + * [u1] isEffectivelyFinal (<< 0), hasTraitImplClassSelfType (<< 1), hasSam (<< 2) * [u2]? traitImplClassSelfType (reference) + * [u2]? samName (reference) + * [u2]? samDescriptor (reference) * [u2] numMethodEntries * [u2] name (reference) * [u2] descriptor (reference) @@ -145,4 +159,4 @@ object InlineInfoAttribute { * In order to instruct the ASM framework to de-serialize the ScalaInlineInfo attribute, we need * to pass a prototype instance when running the class reader. */ -object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null)) +object InlineInfoAttributePrototype extends InlineInfoAttribute(InlineInfo(null, false, null, null, null)) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index c5d2d212f4..035be3acdd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -25,6 +25,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + val heuristics: InlinerHeuristics[btypes.type] = new InlinerHeuristics(btypes) + def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { case invocation: MethodInsnNode => callGraph.removeCallsite(invocation, methodNode) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala new file mode 100644 index 0000000000..09bb59cd4d --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -0,0 +1,146 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package opt + +import scala.tools.nsc.backend.jvm.BTypes.InternalName + +class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { + import bTypes._ + + /* + // using http://lihaoyi.github.io/Ammonite/ + + load.ivy("com.google.guava" % "guava" % "18.0") + val javaUtilFunctionClasses = { + val rt = System.getProperty("sun.boot.class.path").split(":").find(_.endsWith("lib/rt.jar")).get + val u = new java.io.File(rt).toURL + val l = new java.net.URLClassLoader(Array(u)) + val cp = com.google.common.reflect.ClassPath.from(l) + cp.getTopLevelClasses("java.util.function").toArray.map(_.toString).toList + } + + // found using IntelliJ's "Find Usages" on the @FunctionalInterface annotation + val otherClasses = List( + "com.sun.javafx.css.parser.Recognizer", + "java.awt.KeyEventDispatcher", + "java.awt.KeyEventPostProcessor", + "java.io.FileFilter", + "java.io.FilenameFilter", + "java.lang.Runnable", + "java.lang.Thread$UncaughtExceptionHandler", + "java.nio.file.DirectoryStream$Filter", + "java.nio.file.PathMatcher", + "java.time.temporal.TemporalAdjuster", + "java.time.temporal.TemporalQuery", + "java.util.Comparator", + "java.util.concurrent.Callable", + "java.util.logging.Filter", + "java.util.prefs.PreferenceChangeListener", + "javafx.animation.Interpolatable", + "javafx.beans.InvalidationListener", + "javafx.beans.value.ChangeListener", + "javafx.collections.ListChangeListener", + "javafx.collections.MapChangeListener", + "javafx.collections.SetChangeListener", + "javafx.event.EventHandler", + "javafx.util.Builder", + "javafx.util.BuilderFactory", + "javafx.util.Callback" + ) + + val allClasses = javaUtilFunctionClasses ::: otherClasses + + load.ivy("org.ow2.asm" % "asm" % "5.0.4") + val classesAndSamNameDesc = allClasses.map(c => { + val cls = Class.forName(c) + val internalName = org.objectweb.asm.Type.getDescriptor(cls).drop(1).dropRight(1) // drop L and ; + val sams = cls.getMethods.filter(m => { + (m.getModifiers & java.lang.reflect.Modifier.ABSTRACT) != 0 && + m.getName != "equals" // Comparator has an abstract override of "equals" for adding Javadoc + }) + assert(sams.size == 1, internalName + sams.map(_.getName)) + val sam = sams.head + val samDesc = org.objectweb.asm.Type.getMethodDescriptor(sam) + (internalName, sam.getName, samDesc) + }) + println(classesAndSamNameDesc map { + case (cls, nme, desc) => s"""("$cls", "$nme$desc")""" + } mkString ("", ",\n", "\n")) + */ + private val javaSams: Map[String, String] = Map( + ("java/util/function/BiConsumer", "accept(Ljava/lang/Object;Ljava/lang/Object;)V"), + ("java/util/function/BiFunction", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BiPredicate", "test(Ljava/lang/Object;Ljava/lang/Object;)Z"), + ("java/util/function/BinaryOperator", "apply(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/BooleanSupplier", "getAsBoolean()Z"), + ("java/util/function/Consumer", "accept(Ljava/lang/Object;)V"), + ("java/util/function/DoubleBinaryOperator", "applyAsDouble(DD)D"), + ("java/util/function/DoubleConsumer", "accept(D)V"), + ("java/util/function/DoubleFunction", "apply(D)Ljava/lang/Object;"), + ("java/util/function/DoublePredicate", "test(D)Z"), + ("java/util/function/DoubleSupplier", "getAsDouble()D"), + ("java/util/function/DoubleToIntFunction", "applyAsInt(D)I"), + ("java/util/function/DoubleToLongFunction", "applyAsLong(D)J"), + ("java/util/function/DoubleUnaryOperator", "applyAsDouble(D)D"), + ("java/util/function/Function", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("java/util/function/IntBinaryOperator", "applyAsInt(II)I"), + ("java/util/function/IntConsumer", "accept(I)V"), + ("java/util/function/IntFunction", "apply(I)Ljava/lang/Object;"), + ("java/util/function/IntPredicate", "test(I)Z"), + ("java/util/function/IntSupplier", "getAsInt()I"), + ("java/util/function/IntToDoubleFunction", "applyAsDouble(I)D"), + ("java/util/function/IntToLongFunction", "applyAsLong(I)J"), + ("java/util/function/IntUnaryOperator", "applyAsInt(I)I"), + ("java/util/function/LongBinaryOperator", "applyAsLong(JJ)J"), + ("java/util/function/LongConsumer", "accept(J)V"), + ("java/util/function/LongFunction", "apply(J)Ljava/lang/Object;"), + ("java/util/function/LongPredicate", "test(J)Z"), + ("java/util/function/LongSupplier", "getAsLong()J"), + ("java/util/function/LongToDoubleFunction", "applyAsDouble(J)D"), + ("java/util/function/LongToIntFunction", "applyAsInt(J)I"), + ("java/util/function/LongUnaryOperator", "applyAsLong(J)J"), + ("java/util/function/ObjDoubleConsumer", "accept(Ljava/lang/Object;D)V"), + ("java/util/function/ObjIntConsumer", "accept(Ljava/lang/Object;I)V"), + ("java/util/function/ObjLongConsumer", "accept(Ljava/lang/Object;J)V"), + ("java/util/function/Predicate", "test(Ljava/lang/Object;)Z"), + ("java/util/function/Supplier", "get()Ljava/lang/Object;"), + ("java/util/function/ToDoubleBiFunction", "applyAsDouble(Ljava/lang/Object;Ljava/lang/Object;)D"), + ("java/util/function/ToDoubleFunction", "applyAsDouble(Ljava/lang/Object;)D"), + ("java/util/function/ToIntBiFunction", "applyAsInt(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/function/ToIntFunction", "applyAsInt(Ljava/lang/Object;)I"), + ("java/util/function/ToLongBiFunction", "applyAsLong(Ljava/lang/Object;Ljava/lang/Object;)J"), + ("java/util/function/ToLongFunction", "applyAsLong(Ljava/lang/Object;)J"), + ("java/util/function/UnaryOperator", "apply(Ljava/lang/Object;)Ljava/lang/Object;"), + ("com/sun/javafx/css/parser/Recognizer", "recognize(I)Z"), + ("java/awt/KeyEventDispatcher", "dispatchKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/awt/KeyEventPostProcessor", "postProcessKeyEvent(Ljava/awt/event/KeyEvent;)Z"), + ("java/io/FileFilter", "accept(Ljava/io/File;)Z"), + ("java/io/FilenameFilter", "accept(Ljava/io/File;Ljava/lang/String;)Z"), + ("java/lang/Runnable", "run()V"), + ("java/lang/Thread$UncaughtExceptionHandler", "uncaughtException(Ljava/lang/Thread;Ljava/lang/Throwable;)V"), + ("java/nio/file/DirectoryStream$Filter", "accept(Ljava/lang/Object;)Z"), + ("java/nio/file/PathMatcher", "matches(Ljava/nio/file/Path;)Z"), + ("java/time/temporal/TemporalAdjuster", "adjustInto(Ljava/time/temporal/Temporal;)Ljava/time/temporal/Temporal;"), + ("java/time/temporal/TemporalQuery", "queryFrom(Ljava/time/temporal/TemporalAccessor;)Ljava/lang/Object;"), + ("java/util/Comparator", "compare(Ljava/lang/Object;Ljava/lang/Object;)I"), + ("java/util/concurrent/Callable", "call()Ljava/lang/Object;"), + ("java/util/logging/Filter", "isLoggable(Ljava/util/logging/LogRecord;)Z"), + ("java/util/prefs/PreferenceChangeListener", "preferenceChange(Ljava/util/prefs/PreferenceChangeEvent;)V"), + ("javafx/animation/Interpolatable", "interpolate(Ljava/lang/Object;D)Ljava/lang/Object;"), + ("javafx/beans/InvalidationListener", "invalidated(Ljavafx/beans/Observable;)V"), + ("javafx/beans/value/ChangeListener", "changed(Ljavafx/beans/value/ObservableValue;Ljava/lang/Object;Ljava/lang/Object;)V"), + ("javafx/collections/ListChangeListener", "onChanged(Ljavafx/collections/ListChangeListener$Change;)V"), + ("javafx/collections/MapChangeListener", "onChanged(Ljavafx/collections/MapChangeListener$Change;)V"), + ("javafx/collections/SetChangeListener", "onChanged(Ljavafx/collections/SetChangeListener$Change;)V"), + ("javafx/event/EventHandler", "handle(Ljavafx/event/Event;)V"), + ("javafx/util/Builder", "build()Ljava/lang/Object;"), + ("javafx/util/BuilderFactory", "getBuilder(Ljava/lang/Class;)Ljavafx/util/Builder;"), + ("javafx/util/Callback", "call(Ljava/lang/Object;)Ljava/lang/Object;") + ) + def javaSam(internalName: InternalName): Option[String] = javaSams.get(internalName) +} diff --git a/src/reflect/scala/reflect/internal/Definitions.scala b/src/reflect/scala/reflect/internal/Definitions.scala index 02fa3c882b..14487a941b 100644 --- a/src/reflect/scala/reflect/internal/Definitions.scala +++ b/src/reflect/scala/reflect/internal/Definitions.scala @@ -795,7 +795,9 @@ trait Definitions extends api.StandardDefinitions { * The class defining the method is a supertype of `tp` that * has a public no-arg primary constructor. */ - def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else { + def samOf(tp: Type): Symbol = if (!settings.Xexperimental) NoSymbol else findSam(tp) + + def findSam(tp: Type): Symbol = { // if tp has a constructor, it must be public and must not take any arguments // (not even an implicit argument list -- to keep it simple for now) val tpSym = tp.typeSymbol diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/ScalaInlineInfoTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/ScalaInlineInfoTest.scala index f8e887426b..c07d1fe3c4 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/ScalaInlineInfoTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/ScalaInlineInfoTest.scala @@ -9,20 +9,26 @@ import scala.tools.asm.Opcodes._ import org.junit.Assert._ import CodeGenTools._ +import scala.tools.asm.tree.ClassNode import scala.tools.nsc.backend.jvm.BTypes.{MethodInlineInfo, InlineInfo} import scala.tools.partest.ASMConverters import ASMConverters._ import scala.collection.convert.decorateAsScala._ +import scala.tools.testing.ClearAfterClass -object ScalaInlineInfoTest { +object ScalaInlineInfoTest extends ClearAfterClass.Clearable { var compiler = newCompiler(extraArgs = "-Ybackend:GenBCode -Yopt:l:none") def clear(): Unit = { compiler = null } } @RunWith(classOf[JUnit4]) -class ScalaInlineInfoTest { +class ScalaInlineInfoTest extends ClearAfterClass { + ClearAfterClass.stateToClear = ScalaInlineInfoTest + val compiler = newCompiler() + def inlineInfo(c: ClassNode): InlineInfo = c.attrs.asScala.collect({ case a: InlineInfoAttribute => a.inlineInfo }).head + @Test def traitMembersInlineInfo(): Unit = { val code = @@ -58,10 +64,11 @@ class ScalaInlineInfoTest { """.stripMargin val cs @ List(t, tl, to, tCls) = compileClasses(compiler)(code) - val List(info) = t.attrs.asScala.collect({ case a: InlineInfoAttribute => a.inlineInfo }).toList - val expect = InlineInfo( + val info = inlineInfo(t) + val expect = InlineInfo ( None, // self type false, // final class + None, // not a sam Map( ("O()LT$O$;", MethodInlineInfo(true, false,false,false)), ("T$$super$toString()Ljava/lang/String;",MethodInlineInfo(false,false,false,false)), @@ -82,4 +89,43 @@ class ScalaInlineInfoTest { ) assert(info == expect, info) } + + @Test + def inlineInfoSam(): Unit = { + val code = + """abstract class C { + | def f = 0 + | def g(x: Int): Int + | val foo = "hi" + |} + |abstract class D { + | val biz: Int + |} + |trait T { + | def h(a: String): Int + |} + |abstract class E extends T { + | def hihi(x: Int) = x + |} + |class F extends T { + | def h(a: String) = 0 + |} + |trait U { + | def conc() = 10 + | def nullary: Int + |} + """.stripMargin + val cs = compileClasses(compiler)(code) + val sams = cs.map(c => (c.name, inlineInfo(c).sam)) + assertEquals(sams, + List( + ("C",Some("g(I)I")), + ("D",None), + ("E",Some("h(Ljava/lang/String;)I")), + ("F",None), + ("T",Some("h(Ljava/lang/String;)I")), + ("U",None), + ("U$class",None))) + + } } -- cgit v1.2.3 From 40ef6d4abb1ee68c8f0f9579b40e228ac973d092 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 20 Aug 2015 16:06:31 +0200 Subject: move current inliner heuristic to the InlinerHeuristics class --- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 48 +-------------------- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 49 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 035be3acdd..cc5a4de4e4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -26,6 +26,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { import callGraph._ val heuristics: InlinerHeuristics[btypes.type] = new InlinerHeuristics(btypes) + import heuristics._ def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { @@ -87,53 +88,6 @@ class Inliner[BT <: BTypes](val btypes: BT) { } } - /** - * Select callsites from the call graph that should be inlined. The resulting list of inlining - * requests is allowed to have cycles, and the callsites can appear in any order. - */ - def selectCallsitesForInlining: List[Callsite] = { - callsites.valuesIterator.flatMap(_.valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) - - if (!res) { - if (annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) { - // if the callsite is annotated @inline, we report an inline warning even if the underlying - // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). - def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" - def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") - if (doRewriteTraitCallsite(callsite)) - backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) - else if (!safeToInline) - backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) - else - backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) - } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { - // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. - backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) - } - } - - res - - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => - if (warning.emitWarning(compilerSettings)) - backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - })).toList - } - - /** - * The current inlining heuristics are simple: inline calls to methods annotated @inline. - */ - def doInlineCallsite(callsite: Callsite): Boolean = callsite match { - case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline - else annotatedInline && safeToInline - - case _ => false - } - def rewriteFinalTraitMethodInvocations(): Unit = { // Rewriting final trait method callsites to the implementation class enables inlining. // We cannot just iterate over the values of the `callsites` map because the rewrite changes the diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index 09bb59cd4d..35f839aa1b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -11,6 +11,55 @@ import scala.tools.nsc.backend.jvm.BTypes.InternalName class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { import bTypes._ + import inliner._ + import callGraph._ + + /** + * Select callsites from the call graph that should be inlined. The resulting list of inlining + * requests is allowed to have cycles, and the callsites can appear in any order. + */ + def selectCallsitesForInlining: List[Callsite] = { + callsites.valuesIterator.flatMap(_.valuesIterator.filter({ + case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => + val res = doInlineCallsite(callsite) + + if (!res) { + if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { + // if the callsite is annotated @inline, we report an inline warning even if the underlying + // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). + def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" + def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") + if (doRewriteTraitCallsite(callsite)) + backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) + else if (!safeToInline) + backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) + else + backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) + } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { + // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. + backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) + } + } + + res + + case Callsite(ins, _, _, Left(warning), _, _, _, pos) => + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") + false + })).toList + } + + /** + * The current inlining heuristics are simple: inline calls to methods annotated @inline. + */ + def doInlineCallsite(callsite: Callsite): Boolean = callsite match { + case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => + if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline + else annotatedInline && safeToInline + + case _ => false + } /* // using http://lihaoyi.github.io/Ammonite/ -- cgit v1.2.3 From 3e055f1cc19fffe69e4882c6b4f4d5019503a109 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 24 Aug 2015 20:55:21 +0200 Subject: Introduce a type for inline requests An inline request contains a callsite and a list of post-inlining requests. A post-inlining request asks to inline an invocation that is copied from the callee into the callsite method while inlining the initial callsite. Post-inlining requests are still ignored for now. --- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 59 +++++++++--------- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 70 ++++++++++++---------- 2 files changed, 71 insertions(+), 58 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index cc5a4de4e4..cebd5ea1e4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -40,7 +40,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { rewriteFinalTraitMethodInvocations() for (request <- collectAndOrderInlineRequests) { - val Right(callee) = request.callee // collectAndOrderInlineRequests returns callsites with a known callee + val callsite = request.callsite + val Right(callee) = callsite.callee // collectAndOrderInlineRequests returns callsites with a known callee // Inlining a method can create unreachable code. Example: // def f = throw e @@ -51,16 +52,18 @@ class Inliner[BT <: BTypes](val btypes: BT) { // DCE above removes unreachable callsites from the call graph. If the inlining request denotes // such an eliminated callsite, do nothing. - if (callGraph.callsites(request.callsiteMethod).contains(request.callsiteInstruction)) { - val r = inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass, + if (callGraph.callsites(callsite.callsiteMethod).contains(callsite.callsiteInstruction)) { + val r = inline(callsite.callsiteInstruction, callsite.callsiteStackHeight, callsite.callsiteMethod, callsite.callsiteClass, callee.callee, callee.calleeDeclarationClass, - request.receiverKnownNotNull, keepLineNumbers = false) + callsite.receiverKnownNotNull, keepLineNumbers = false) + + // TODO: run downstream inline requests for (warning <- r) { if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) { val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else "" val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning" - backendReporting.inlinerWarning(request.callsitePosition, msg) + backendReporting.inlinerWarning(callsite.callsitePosition, msg) } } } @@ -72,19 +75,21 @@ class Inliner[BT <: BTypes](val btypes: BT) { * - Always remove the same request when breaking inlining cycles * - Perform inlinings in a consistent order */ - object callsiteOrdering extends Ordering[Callsite] { - override def compare(x: Callsite, y: Callsite): Int = { - val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName + object callsiteOrdering extends Ordering[InlineRequest] { + override def compare(x: InlineRequest, y: InlineRequest): Int = { + val xCs = x.callsite + val yCs = y.callsite + val cls = xCs.callsiteClass.internalName compareTo yCs.callsiteClass.internalName if (cls != 0) return cls - val name = x.callsiteMethod.name compareTo y.callsiteMethod.name + val name = xCs.callsiteMethod.name compareTo yCs.callsiteMethod.name if (name != 0) return name - val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc + val desc = xCs.callsiteMethod.desc compareTo yCs.callsiteMethod.desc if (desc != 0) return desc def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction) - pos(x) - pos(y) + pos(xCs) - pos(yCs) } } @@ -194,15 +199,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The resulting list is sorted such that the leaves of the inline request graph are on the left. * Once these leaves are inlined, the successive elements will be leaves, etc. */ - private def collectAndOrderInlineRequests: List[Callsite] = { - val requests = selectCallsitesForInlining + private def collectAndOrderInlineRequests: List[InlineRequest] = { + val requestsByMethod = selectCallsitesForInlining withDefaultValue Set.empty - // This map is an index to look up the inlining requests for a method. The value sets are mutable - // to allow removing elided requests (to break inlining cycles). The map itself is mutable to - // allow efficient building: requests.groupBy would build values as List[Callsite] that need to - // be transformed to mutable sets. - val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty) - for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r + val elided = mutable.Set.empty[InlineRequest] + def nonElidedRequests(methodNode: MethodNode): Set[InlineRequest] = requestsByMethod(methodNode) diff elided /** * Break cycles in the inline request graph by removing callsites. @@ -210,7 +211,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * The list `requests` is traversed left-to-right, removing those callsites that are part of a * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map. */ - def breakInlineCycles(requests: List[Callsite]): List[Callsite] = { + def breakInlineCycles: List[InlineRequest] = { // is there a path of inline requests from start to goal? def isReachable(start: MethodNode, goal: MethodNode): Boolean = { @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match { @@ -218,7 +219,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { if (x == goal) true else if (visited(x)) reachableImpl(xs, visited) else { - val callees = inlineRequestsForMethod(x).map(_.callee.get.callee) + val callees = nonElidedRequests(x).map(_.callsite.callee.get.callee) reachableImpl(xs ::: callees.toList, visited + x) } @@ -228,12 +229,14 @@ class Inliner[BT <: BTypes](val btypes: BT) { reachableImpl(List(start), Set.empty) } - val result = new mutable.ListBuffer[Callsite]() + val result = new mutable.ListBuffer[InlineRequest]() + val requests = requestsByMethod.valuesIterator.flatten.toArray // sort the inline requests to ensure that removing requests is deterministic - for (r <- requests.sorted(callsiteOrdering)) { + java.util.Arrays.sort(requests, callsiteOrdering) + for (r <- requests) { // is there a chain of inlining requests that would inline the callsite method into the callee? - if (isReachable(r.callee.get.callee, r.callsiteMethod)) - inlineRequestsForMethod(r.callsiteMethod) -= r + if (isReachable(r.callsite.callee.get.callee, r.callsite.callsiteMethod)) + elided += r else result += r } @@ -242,11 +245,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { // sort the remaining inline requests such that the leaves appear first, then those requests // that become leaves, etc. - def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = { + def leavesFirst(requests: List[InlineRequest], visited: Set[InlineRequest] = Set.empty): List[InlineRequest] = { if (requests.isEmpty) Nil else { val (leaves, others) = requests.partition(r => { - val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee) + val inlineRequestsForCallee = nonElidedRequests(r.callsite.callee.get.callee) inlineRequestsForCallee.forall(visited) }) assert(leaves.nonEmpty, requests) @@ -254,7 +257,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { } } - leavesFirst(breakInlineCycles(requests)) + leavesFirst(breakInlineCycles) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index 35f839aa1b..ba05608918 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -7,47 +7,57 @@ package scala.tools.nsc package backend.jvm package opt +import scala.tools.asm.tree.{MethodNode, MethodInsnNode} import scala.tools.nsc.backend.jvm.BTypes.InternalName +import scala.collection.convert.decorateAsScala._ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { import bTypes._ import inliner._ import callGraph._ + case class InlineRequest(callsite: Callsite, post: List[PostInlineRequest]) + case class PostInlineRequest(callsiteInstruction: MethodInsnNode, post: List[PostInlineRequest]) + /** * Select callsites from the call graph that should be inlined. The resulting list of inlining * requests is allowed to have cycles, and the callsites can appear in any order. */ - def selectCallsitesForInlining: List[Callsite] = { - callsites.valuesIterator.flatMap(_.valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) - - if (!res) { - if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { - // if the callsite is annotated @inline, we report an inline warning even if the underlying - // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). - def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" - def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") - if (doRewriteTraitCallsite(callsite)) - backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) - else if (!safeToInline) - backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) - else - backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) - } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { - // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. - backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) - } - } - - res - - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => - if (warning.emitWarning(compilerSettings)) - backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - })).toList + def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = { + callGraph.callsites.iterator.map({ + case (methodNode, callsites) => + val requests = callsites.valuesIterator.filter({ + case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => + val res = doInlineCallsite(callsite) + + if (!res) { + if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { + // if the callsite is annotated @inline, we report an inline warning even if the underlying + // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). + def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" + def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") + if (doRewriteTraitCallsite(callsite)) + backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) + else if (!safeToInline) + backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) + else + backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) + } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { + // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. + backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) + } + } + + res + + case Callsite(ins, _, _, Left(warning), _, _, _, pos) => + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") + false + }) + + (methodNode, requests.map(InlineRequest(_, Nil)).toSet) + }).filterNot(_._2.isEmpty).toMap } /** -- cgit v1.2.3 From 246f3eb69707db9ef8185e68408b3ddaba9ac17d Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 24 Aug 2015 21:25:19 +0200 Subject: Make sure to inline only callsites in classes being compiled Avoid producing inline requests for callsites in classes on the classpath. --- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 62 ++++++++++++---------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index ba05608918..df2eb813d3 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -24,39 +24,43 @@ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { * requests is allowed to have cycles, and the callsites can appear in any order. */ def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = { - callGraph.callsites.iterator.map({ - case (methodNode, callsites) => - val requests = callsites.valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) - - if (!res) { - if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { - // if the callsite is annotated @inline, we report an inline warning even if the underlying - // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). - def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" - def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") - if (doRewriteTraitCallsite(callsite)) - backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) - else if (!safeToInline) - backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) - else - backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) - } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { - // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. - backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) - } + // We should only return inlining requests for callsites being compiled (not for callsites in + // classes on the classpath). The call graph may contain callsites of classes parsed from the + // classpath. In order to get only the callsites being compiled, we start at the map of + // compilingClasses in the byteCodeRepository. + val compilingMethods = byteCodeRepository.compilingClasses.valuesIterator.flatMap(_.methods.iterator.asScala) + compilingMethods.map(methodNode => { + val requests = callGraph.callsites(methodNode).valuesIterator.filter({ + case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => + val res = doInlineCallsite(callsite) + + if (!res) { + if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { + // if the callsite is annotated @inline, we report an inline warning even if the underlying + // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). + def initMsg = s"${BackendReporting.methodSignature(calleeDeclClass.internalName, callee)} is annotated @inline but cannot be inlined" + def warnMsg = warning.map(" Possible reason:\n" + _).getOrElse("") + if (doRewriteTraitCallsite(callsite)) + backendReporting.inlinerWarning(pos, s"$initMsg: the trait method call could not be rewritten to the static implementation method." + warnMsg) + else if (!safeToInline) + backendReporting.inlinerWarning(pos, s"$initMsg: the method is not final and may be overridden." + warnMsg) + else + backendReporting.inlinerWarning(pos, s"$initMsg." + warnMsg) + } else if (warning.isDefined && warning.get.emitWarning(compilerSettings)) { + // when annotatedInline is false, and there is some warning, the callsite metadata is possibly incomplete. + backendReporting.inlinerWarning(pos, s"there was a problem determining if method ${callee.name} can be inlined: \n"+ warning.get) } + } - res + res - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => - if (warning.emitWarning(compilerSettings)) - backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - }) + case Callsite(ins, _, _, Left(warning), _, _, _, pos) => + if (warning.emitWarning(compilerSettings)) + backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") + false + }) - (methodNode, requests.map(InlineRequest(_, Nil)).toSet) + (methodNode, requests.map(InlineRequest(_, Nil)).toSet) }).filterNot(_._2.isEmpty).toMap } -- cgit v1.2.3 From 2ae4d48ada4b1d17a0536d1df9c9d21c8cf5a951 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 28 Aug 2015 13:18:00 +0200 Subject: Fix maxStack after inlining The computed value was too large previously: inlining stores receiver and argument values of the inlined method into locals. A too large value doesn't cause any bugs, but data flow analyses would allocate too large frames. The test (InlinerTest.maxLocalsMaxStackAfterInline) didn't catch the bug because it tested the methods of the wrong ClassNode. The CodeGenTools.compileClasses method returns a ClassNode that is created from the classfile byte array emitted by the compiler. This ClassNode is a new object and unrelated to the ClassNode used in the compiler backend. The test now takes the correct ClassNode from the hash map in the backend's byteCodeRepository. --- src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 6 ++++-- test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index cebd5ea1e4..13a5060f07 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -316,8 +316,9 @@ class Inliner[BT <: BTypes](val btypes: BT) { // We just use an asm.Type here, no need to create the MethodBType. val calleAsmType = asm.Type.getMethodType(callee.desc) + val calleeParamTypes = calleAsmType.getArgumentTypes - for(argTp <- calleAsmType.getArgumentTypes) { + for(argTp <- calleeParamTypes) { val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack nextLocalIndex += argTp.getSize @@ -423,7 +424,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { unreachableCodeEliminated -= callsiteMethod callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight) + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) None } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index c6313a84d2..f085cc917b 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -72,6 +72,10 @@ class InlinerTest extends ClearAfterClass { def compile(scalaCode: String, javaCode: List[(String, String)] = Nil, allowMessage: StoreReporter#Info => Boolean = _ => false): List[ClassNode] = { InlinerTest.notPerRun.foreach(_.clear()) compileClasses(compiler)(scalaCode, javaCode, allowMessage) + // Use the class nodes stored in the byteCodeRepository. The ones returned by compileClasses are not the same, + // these are created new from the classfile byte array. They are completely separate instances which cannot + // be used to look up methods / callsites in the callGraph hash maps for example. + byteCodeRepository.compilingClasses.valuesIterator.toList.sortBy(_.name) } def checkCallsite(callsite: callGraph.Callsite, callee: MethodNode) = { -- cgit v1.2.3 From 294bd25d884f62fcda4dc024c1fdef094b2eae84 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 28 Aug 2015 13:18:52 +0200 Subject: Inline post-inlining requests Inline requests have a list of post-inline requests: callsites within the inlined method that should be inlined into the initial callee. This commit changes the inliner to actually perform post-inline requests. --- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 308 +++++++++++---------- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 132 ++++++--- 2 files changed, 258 insertions(+), 182 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 13a5060f07..a2eea1af2e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -53,13 +53,9 @@ class Inliner[BT <: BTypes](val btypes: BT) { // DCE above removes unreachable callsites from the call graph. If the inlining request denotes // such an eliminated callsite, do nothing. if (callGraph.callsites(callsite.callsiteMethod).contains(callsite.callsiteInstruction)) { - val r = inline(callsite.callsiteInstruction, callsite.callsiteStackHeight, callsite.callsiteMethod, callsite.callsiteClass, - callee.callee, callee.calleeDeclarationClass, - callsite.receiverKnownNotNull, keepLineNumbers = false) + val warnings = inline(request) - // TODO: run downstream inline requests - - for (warning <- r) { + for (warning <- warnings) { if ((callee.annotatedInline && btypes.compilerSettings.YoptWarningEmitAtInlineFailed) || warning.emitWarning(compilerSettings)) { val annotWarn = if (callee.annotatedInline) " is annotated @inline but" else "" val msg = s"${BackendReporting.methodSignature(callee.calleeDeclarationClass.internalName, callee.callee)}$annotWarn could not be inlined:\n$warning" @@ -260,183 +256,199 @@ class Inliner[BT <: BTypes](val btypes: BT) { leavesFirst(breakInlineCycles) } + /** + * Inline the callsites of an inlining request and its post-inlining requests. + * + * @return An inliner warning for each callsite that could not be inlined. + */ + def inline(request: InlineRequest): List[CannotInlineWarning] = canInline(request.callsite) match { + case Some(warning) => List(warning) + case None => + val instructionsMap = inlineCallsite(request.callsite) + val postRequests = request.post.flatMap(post => { + // the post-request invocation instruction might not exist anymore: it might have been + // inlined itself, or eliminated by DCE. + for { + inlinedInvocationInstr <- instructionsMap.get(post.callsiteInstruction).map(_.asInstanceOf[MethodInsnNode]) + inlinedCallsite <- callGraph.callsites(request.callsite.callsiteMethod).get(inlinedInvocationInstr) + } yield InlineRequest(inlinedCallsite, post.post) + }) + postRequests flatMap inline + } /** * Copy and adapt the instructions of a method to a callsite. * * Preconditions: + * - The callsite can safely be inlined (canInline is true) * - The maxLocals and maxStack values of the callsite method are correctly computed - * - The callsite method contains no unreachable basic blocks, i.e., running an [[Analyzer]] - * does not produce any `null` frames + * - The callsite method contains no unreachable basic blocks, i.e., running an Analyzer does + * not produce any `null` frames * - * @param callsiteInstruction The invocation instruction - * @param callsiteStackHeight The stack height at the callsite - * @param callsiteMethod The method in which the invocation occurs - * @param callsiteClass The class in which the callsite method is defined - * @param callee The invoked method - * @param calleeDeclarationClass The class in which the invoked method is defined - * @param receiverKnownNotNull `true` if the receiver is known to be non-null - * @param keepLineNumbers `true` if LineNumberNodes should be copied to the call site - * @return `Some(message)` if inlining cannot be performed, `None` otherwise + * @return A map associating instruction nodes of the callee with the corresponding cloned + * instruction in the callsite method. */ - def inline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType, - receiverKnownNotNull: Boolean, keepLineNumbers: Boolean): Option[CannotInlineWarning] = { - canInline(callsiteInstruction, callsiteStackHeight, callsiteMethod, callsiteClass, callee, calleeDeclarationClass) orElse { - // New labels for the cloned instructions - val labelsMap = cloneLabels(callee) - val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap) - if (!keepLineNumbers) { - removeLineNumberNodes(clonedInstructions) - } + def inlineCallsite(callsite: Callsite): Map[AbstractInsnNode, AbstractInsnNode] = { + import callsite.{callsiteClass, callsiteMethod, callsiteInstruction, receiverKnownNotNull, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} + + // New labels for the cloned instructions + val labelsMap = cloneLabels(callee) + val (clonedInstructions, instructionMap) = cloneInstructions(callee, labelsMap) + val keepLineNumbers = callsiteClass == calleeDeclarationClass + if (!keepLineNumbers) { + removeLineNumberNodes(clonedInstructions) + } - // local vars in the callee are shifted by the number of locals at the callsite - val localVarShift = callsiteMethod.maxLocals - clonedInstructions.iterator.asScala foreach { - case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift - case iinc: IincInsnNode => iinc.`var` += localVarShift - case _ => () - } + // local vars in the callee are shifted by the number of locals at the callsite + val localVarShift = callsiteMethod.maxLocals + clonedInstructions.iterator.asScala foreach { + case varInstruction: VarInsnNode => varInstruction.`var` += localVarShift + case iinc: IincInsnNode => iinc.`var` += localVarShift + case _ => () + } - // add a STORE instruction for each expected argument, including for THIS instance if any - val argStores = new InsnList - var nextLocalIndex = callsiteMethod.maxLocals - if (!isStaticMethod(callee)) { - if (!receiverKnownNotNull) { - argStores.add(new InsnNode(DUP)) - val nonNullLabel = newLabelNode - argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) - argStores.add(new InsnNode(ACONST_NULL)) - argStores.add(new InsnNode(ATHROW)) - argStores.add(nonNullLabel) - } - argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) - nextLocalIndex += 1 + // add a STORE instruction for each expected argument, including for THIS instance if any + val argStores = new InsnList + var nextLocalIndex = callsiteMethod.maxLocals + if (!isStaticMethod(callee)) { + if (!receiverKnownNotNull) { + argStores.add(new InsnNode(DUP)) + val nonNullLabel = newLabelNode + argStores.add(new JumpInsnNode(IFNONNULL, nonNullLabel)) + argStores.add(new InsnNode(ACONST_NULL)) + argStores.add(new InsnNode(ATHROW)) + argStores.add(nonNullLabel) } + argStores.add(new VarInsnNode(ASTORE, nextLocalIndex)) + nextLocalIndex += 1 + } - // We just use an asm.Type here, no need to create the MethodBType. - val calleAsmType = asm.Type.getMethodType(callee.desc) - val calleeParamTypes = calleAsmType.getArgumentTypes + // We just use an asm.Type here, no need to create the MethodBType. + val calleAsmType = asm.Type.getMethodType(callee.desc) + val calleeParamTypes = calleAsmType.getArgumentTypes - for(argTp <- calleeParamTypes) { - val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp - argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack - nextLocalIndex += argTp.getSize - } + for(argTp <- calleeParamTypes) { + val opc = argTp.getOpcode(ISTORE) // returns the correct xSTORE instruction for argTp + argStores.insert(new VarInsnNode(opc, nextLocalIndex)) // "insert" is "prepend" - the last argument is on the top of the stack + nextLocalIndex += argTp.getSize + } - clonedInstructions.insert(argStores) - - // label for the exit of the inlined functions. xRETURNs are rplaced by GOTOs to this label. - val postCallLabel = newLabelNode - clonedInstructions.add(postCallLabel) - - // replace xRETURNs: - // - store the return value (if any) - // - clear the stack of the inlined method (insert DROPs) - // - load the return value - // - GOTO postCallLabel - - val returnType = calleAsmType.getReturnType - val hasReturnValue = returnType.getSort != asm.Type.VOID - val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals - nextLocalIndex += returnType.getSize - - def returnValueStore(returnInstruction: AbstractInsnNode) = { - val opc = returnInstruction.getOpcode match { - case IRETURN => ISTORE - case LRETURN => LSTORE - case FRETURN => FSTORE - case DRETURN => DSTORE - case ARETURN => ASTORE - } - new VarInsnNode(opc, returnValueIndex) + clonedInstructions.insert(argStores) + + // label for the exit of the inlined functions. xRETURNs are rplaced by GOTOs to this label. + val postCallLabel = newLabelNode + clonedInstructions.add(postCallLabel) + + // replace xRETURNs: + // - store the return value (if any) + // - clear the stack of the inlined method (insert DROPs) + // - load the return value + // - GOTO postCallLabel + + val returnType = calleAsmType.getReturnType + val hasReturnValue = returnType.getSort != asm.Type.VOID + val returnValueIndex = callsiteMethod.maxLocals + callee.maxLocals + nextLocalIndex += returnType.getSize + + def returnValueStore(returnInstruction: AbstractInsnNode) = { + val opc = returnInstruction.getOpcode match { + case IRETURN => ISTORE + case LRETURN => LSTORE + case FRETURN => FSTORE + case DRETURN => DSTORE + case ARETURN => ASTORE } + new VarInsnNode(opc, returnValueIndex) + } - // We run an interpreter to know the stack height at each xRETURN instruction and the sizes - // of the values on the stack. - val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) + // We run an interpreter to know the stack height at each xRETURN instruction and the sizes + // of the values on the stack. + val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) - for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { - val frame = analyzer.frameAt(originalReturn) - var stackHeight = frame.getStackSize + for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { + val frame = analyzer.frameAt(originalReturn) + var stackHeight = frame.getStackSize - val inlinedReturn = instructionMap(originalReturn) - val returnReplacement = new InsnList + val inlinedReturn = instructionMap(originalReturn) + val returnReplacement = new InsnList - def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) + def drop(slot: Int) = returnReplacement add getPop(frame.peekStack(slot).getSize) - // for non-void methods, store the stack top into the return local variable - if (hasReturnValue) { - returnReplacement add returnValueStore(originalReturn) - stackHeight -= 1 - } + // for non-void methods, store the stack top into the return local variable + if (hasReturnValue) { + returnReplacement add returnValueStore(originalReturn) + stackHeight -= 1 + } - // drop the rest of the stack - for (i <- 0 until stackHeight) drop(i) + // drop the rest of the stack + for (i <- 0 until stackHeight) drop(i) - returnReplacement add new JumpInsnNode(GOTO, postCallLabel) - clonedInstructions.insert(inlinedReturn, returnReplacement) - clonedInstructions.remove(inlinedReturn) - } + returnReplacement add new JumpInsnNode(GOTO, postCallLabel) + clonedInstructions.insert(inlinedReturn, returnReplacement) + clonedInstructions.remove(inlinedReturn) + } - // Load instruction for the return value - if (hasReturnValue) { - val retVarLoad = { - val opc = returnType.getOpcode(ILOAD) - new VarInsnNode(opc, returnValueIndex) - } - clonedInstructions.insert(postCallLabel, retVarLoad) + // Load instruction for the return value + if (hasReturnValue) { + val retVarLoad = { + val opc = returnType.getOpcode(ILOAD) + new VarInsnNode(opc, returnValueIndex) } + clonedInstructions.insert(postCallLabel, retVarLoad) + } - callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) - callsiteMethod.instructions.remove(callsiteInstruction) - - callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) - callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) - - // Add all invocation instructions and closure instantiations that were inlined to the call graph - callGraph.callsites(callee).valuesIterator foreach { originalCallsite => - val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] - callGraph.addCallsite(Callsite( - callsiteInstruction = newCallsiteIns, - callsiteMethod = callsiteMethod, - callsiteClass = callsiteClass, - callee = originalCallsite.callee, - argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) - callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, - receiverKnownNotNull = originalCallsite.receiverKnownNotNull, - callsitePosition = originalCallsite.callsitePosition - ) - ) - } + callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) + callsiteMethod.instructions.remove(callsiteInstruction) + + callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) + callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) + + // Add all invocation instructions and closure instantiations that were inlined to the call graph + callGraph.callsites(callee).valuesIterator foreach { originalCallsite => + val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] + callGraph.addCallsite(Callsite( + callsiteInstruction = newCallsiteIns, + callsiteMethod = callsiteMethod, + callsiteClass = callsiteClass, + callee = originalCallsite.callee, + argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) + callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, + receiverKnownNotNull = originalCallsite.receiverKnownNotNull, + callsitePosition = originalCallsite.callsitePosition + ) + ) + } - callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => - val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] - callGraph.addClosureInstantiation( - ClosureInstantiation(originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) - ) - } + callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => + val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] + callGraph.addClosureInstantiation( + ClosureInstantiation(originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) + ) + } - // Remove the elided invocation from the call graph - callGraph.removeCallsite(callsiteInstruction, callsiteMethod) + // Remove the elided invocation from the call graph + callGraph.removeCallsite(callsiteInstruction, callsiteMethod) - // Inlining a method body can render some code unreachable, see example above (in runInliner). - unreachableCodeEliminated -= callsiteMethod + // Inlining a method body can render some code unreachable, see example above (in runInliner). + unreachableCodeEliminated -= callsiteMethod - callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) + callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) - None - } + instructionMap } /** - * Check whether an inling can be performed. Parmeters are described in method [[inline]]. + * Check whether an inling can be performed. * @return `Some(message)` if inlining cannot be performed, `None` otherwise */ - def canInline(callsiteInstruction: MethodInsnNode, callsiteStackHeight: Int, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: MethodNode, calleeDeclarationClass: ClassBType): Option[CannotInlineWarning] = { + def canInline(callsite: Callsite): Option[CannotInlineWarning] = { + import callsite.{callsiteInstruction, callsiteMethod, callsiteClass, callsiteStackHeight} + val Right(callsiteCallee) = callsite.callee + import callsiteCallee.{callee, calleeDeclarationClass} def calleeDesc = s"${callee.name} of type ${callee.desc} in ${calleeDeclarationClass.internalName}" def methodMismatch = s"Wrong method node for inlining ${textify(callsiteInstruction)}: $calleeDesc" diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index f085cc917b..d727951a6b 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -7,7 +7,7 @@ import org.junit.runners.JUnit4 import org.junit.Test import scala.collection.generic.Clearable import scala.collection.mutable.ListBuffer -import scala.reflect.internal.util.BatchSourceFile +import scala.reflect.internal.util.{NoPosition, BatchSourceFile} import scala.tools.asm.Opcodes._ import org.junit.Assert._ @@ -87,8 +87,23 @@ class InlinerTest extends ClearAfterClass { assert(callsite.callee.get.callee == callee, callsite.callee.get.callee.name) } + def makeInlineRequest( callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, + callee: MethodNode, calleeDeclarationClass: ClassBType, + callsiteStackHeight: Int, receiverKnownNotNull: Boolean, + post: List[inliner.heuristics.PostInlineRequest] = Nil) = inliner.heuristics.InlineRequest( + callsite = callGraph.Callsite( + callsiteInstruction = callsiteInstruction, + callsiteMethod = callsiteMethod, + callsiteClass = callsiteClass, + callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, calleeInfoWarning = None)), + argInfos = Nil, + callsiteStackHeight = callsiteStackHeight, + receiverKnownNotNull = receiverKnownNotNull, + callsitePosition = NoPosition), + post = post) + // inline first invocation of f into g in class C - def inlineTest(code: String, mod: ClassNode => Unit = _ => ()): (MethodNode, Option[CannotInlineWarning]) = { + def inlineTest(code: String, mod: ClassNode => Unit = _ => ()): (MethodNode, List[CannotInlineWarning]) = { val List(cls) = compile(code) mod(cls) val clsBType = classBTypeFromParsedClassfile(cls.name) @@ -98,15 +113,17 @@ class InlinerTest extends ClearAfterClass { val analyzer = new AsmAnalyzer(g, clsBType.internalName) - val r = inliner.inline( - fCall, - analyzer.frameAt(fCall).getStackSize, - g, - clsBType, - f, - clsBType, - receiverKnownNotNull = true, - keepLineNumbers = true) + val request = makeInlineRequest( + callsiteInstruction = fCall, + callsiteMethod = g, + callsiteClass = clsBType, + callee = f, + calleeDeclarationClass = clsBType, + callsiteStackHeight = analyzer.frameAt(fCall).getStackSize, + receiverKnownNotNull = true + ) + + val r = inliner.inline(request) (g, r) } @@ -178,7 +195,7 @@ class InlinerTest extends ClearAfterClass { val f = cls.methods.asScala.find(_.name == "f").get f.access |= ACC_SYNCHRONIZED }) - assert(can.get.isInstanceOf[SynchronizedMethod], can) + assert(can.length == 1 && can.head.isInstanceOf[SynchronizedMethod], can) } @Test @@ -204,7 +221,7 @@ class InlinerTest extends ClearAfterClass { |} """.stripMargin val (_, r) = inlineTest(code) - assert(r.get.isInstanceOf[MethodWithHandlerCalledOnNonEmptyStack], r) + assert(r.length == 1 && r.head.isInstanceOf[MethodWithHandlerCalledOnNonEmptyStack], r) } @Test @@ -236,17 +253,19 @@ class InlinerTest extends ClearAfterClass { val analyzer = new AsmAnalyzer(h, dTp.internalName) - val r = inliner.inline( - gCall, - analyzer.frameAt(gCall).getStackSize, - h, - dTp, - g, - cTp, - receiverKnownNotNull = true, - keepLineNumbers = true) - - assert(r.get.isInstanceOf[IllegalAccessInstruction], r) + val request = makeInlineRequest( + callsiteInstruction = gCall, + callsiteMethod = h, + callsiteClass = dTp, + callee = g, + calleeDeclarationClass = cTp, + callsiteStackHeight = analyzer.frameAt(gCall).getStackSize, + receiverKnownNotNull = true + ) + + val r = inliner.inline(request) + + assert(r.length == 1 && r.head.isInstanceOf[IllegalAccessInstruction], r) } @Test @@ -391,16 +410,17 @@ class InlinerTest extends ClearAfterClass { val integerClassBType = classBTypeFromInternalName("java/lang/Integer") val lowestOneBitMethod = byteCodeRepository.methodNode(integerClassBType.internalName, "lowestOneBit", "(I)I").get._1 - val r = inliner.inline( - callsiteIns, - analyzer.frameAt(callsiteIns).getStackSize, - f, - clsBType, - lowestOneBitMethod, - integerClassBType, - receiverKnownNotNull = false, - keepLineNumbers = false) - + val request = makeInlineRequest( + callsiteInstruction = callsiteIns, + callsiteMethod = f, + callsiteClass = clsBType, + callee = lowestOneBitMethod, + calleeDeclarationClass = integerClassBType, + callsiteStackHeight = analyzer.frameAt(callsiteIns).getStackSize, + receiverKnownNotNull = false + ) + + val r = inliner.inline(request) assert(r.isEmpty, r) val ins = instructionsFromMethod(f) @@ -1036,4 +1056,48 @@ class InlinerTest extends ClearAfterClass { }) assertInvoke(t2, "M$", "M$$$anonfun$1") } + + @Test + def inlinePostRequests(): Unit = { + val code = + """class C { + | final def f = 10 + | final def g = f + 19 + | final def h = g + 29 + |} + """.stripMargin + + val List(c) = compile(code) + + val cTp = classBTypeFromParsedClassfile(c.name) + + val f = c.methods.asScala.find(_.name == "f").get + val g = c.methods.asScala.find(_.name == "g").get + val h = c.methods.asScala.find(_.name == "h").get + + val gCall = h.instructions.iterator.asScala.collect({ + case m: MethodInsnNode if m.name == "g" => m + }).next() + val fCall = g.instructions.iterator.asScala.collect({ + case m: MethodInsnNode if m.name == "f" => m + }).next() + + val analyzer = new AsmAnalyzer(h, cTp.internalName) + + val request = makeInlineRequest( + callsiteInstruction = gCall, + callsiteMethod = h, + callsiteClass = cTp, + callee = g, + calleeDeclarationClass = cTp, + callsiteStackHeight = analyzer.frameAt(gCall).getStackSize, + receiverKnownNotNull = false, + post = List(inliner.heuristics.PostInlineRequest(fCall, Nil)) + ) + + val r = inliner.inline(request) + assertNoInvoke(getSingleMethod(c, "h")) // no invoke in h: first g is inlined, then the inlined call to f is also inlined + assertInvoke(getSingleMethod(c, "g"), "C", "f") // g itself still has the call to f + assert(r.isEmpty, r) + } } -- cgit v1.2.3 From 9fc684b3cacdc008507a1e32cdab4408a080e60d Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Tue, 25 Aug 2015 11:42:46 +0200 Subject: Ensure the call graph contains all inlined callsites After inlining, inlined callsites are added to the call graph. For efficiency, the callsites of the callee are copied and adapted (the alternative would be to re-add the entire method to the call graph, which involves running data flow analyses). If the callee is in a class parsed from bytecode (vs a class being compiled), it might not yet be in the call graph. This commit ensures that the method is added to the call graph before looking for its callsites. --- src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala | 4 ++++ src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 2 ++ 2 files changed, 6 insertions(+) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index e11ae08a8c..c1293d4e81 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -53,6 +53,10 @@ class CallGraph[BT <: BTypes](val btypes: BT) { classNode.methods.asScala.foreach(addMethod(_, classType)) } + def addIfMissing(methodNode: MethodNode, definingClass: ClassBType): Unit = { + if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass) + } + /** * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. */ diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index a2eea1af2e..f4673be974 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -405,6 +405,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) + callGraph.addIfMissing(callee, calleeDeclarationClass) + // Add all invocation instructions and closure instantiations that were inlined to the call graph callGraph.callsites(callee).valuesIterator foreach { originalCallsite => val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] -- cgit v1.2.3 From da8f263208f8934650d3900793a4115ff1751310 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 26 Aug 2015 11:30:10 +0200 Subject: Include information about higher-order methods in the call graph For higher order methods, the call graph contains a map from parameter positions to SAM types. --- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 185 +++++++++++---------- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 19 ++- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 9 +- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 73 +++++--- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 3 +- 5 files changed, 169 insertions(+), 120 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index c1293d4e81..fcb8991baa 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -7,6 +7,7 @@ package scala.tools.nsc package backend.jvm package opt +import scala.collection.immutable.IntMap import scala.reflect.internal.util.{NoPosition, Position} import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter} import scala.tools.asm.{Opcodes, Type, Handle} @@ -48,6 +49,32 @@ class CallGraph[BT <: BTypes](val btypes: BT) { */ val closureInstantiations: mutable.Map[MethodNode, Map[InvokeDynamicInsnNode, ClosureInstantiation]] = recordPerRunCache(concurrent.TrieMap.empty withDefaultValue Map.empty) + def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = { + val methodCallsites = callsites(methodNode) + val newCallsites = methodCallsites - invocation + if (newCallsites.isEmpty) callsites.remove(methodNode) + else callsites(methodNode) = newCallsites + methodCallsites.get(invocation) + } + + def addCallsite(callsite: Callsite): Unit = { + val methodCallsites = callsites(callsite.callsiteMethod) + callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite) + } + + def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = { + val methodClosureInits = closureInstantiations(methodNode) + val newClosureInits = methodClosureInits - indy + if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode) + else closureInstantiations(methodNode) = newClosureInits + methodClosureInits.get(indy) + } + + def addClosureInstantiation(closureInit: ClosureInstantiation) = { + val methodClosureInits = closureInstantiations(closureInit.ownerMethod) + closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit) + } + def addClass(classNode: ClassNode): Unit = { val classType = classBTypeFromClassNode(classNode) classNode.methods.asScala.foreach(addMethod(_, classType)) @@ -61,70 +88,6 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. */ def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = { - - case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, - annotatedInline: Boolean, annotatedNoInline: Boolean, - warning: Option[CalleeInfoWarning]) - - /** - * Analyze a callsite and gather meta-data that can be used for inlining decisions. - */ - def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = { - val methodSignature = calleeMethodNode.name + calleeMethodNode.desc - - try { - // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* - // within a class (not for inherited methods). Since we already have the classBType of the - // callee, we only check there for the methodInlineInfo, we should find it there. - calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { - case Some(methodInlineInfo) => - val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit - - val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) - - // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: - // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined - // - // TODO: type analysis can render more calls statically resolved. Example: - // new A.f // can be inlined, the receiver type is known to be exactly A. - val isStaticallyResolved: Boolean = { - methodInlineInfo.effectivelyFinal || - classBTypeFromParsedClassfile(receiverTypeInternalName).info.orThrow.inlineInfo.isEffectivelyFinal // (1) - } - - val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation - - val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( - MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) - - // (1) For invocations of final trait methods, the callee isStaticallyResolved but also - // abstract. Such a callee is not safe to inline - it needs to be re-written to the - // static impl method first (safeToRewrite). - // (2) Final trait methods can be rewritten from the interface to the static implementation - // method to enable inlining. - CallsiteInfo( - safeToInline = - canInlineFromSource && - isStaticallyResolved && // (1) - !isAbstract && - !BytecodeUtils.isConstructor(calleeMethodNode) && - !BytecodeUtils.isNativeMethod(calleeMethodNode), - safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) - annotatedInline = methodInlineInfo.annotatedInline, - annotatedNoInline = methodInlineInfo.annotatedNoInline, - warning = warning) - - case None => - val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) - CallsiteInfo(false, false, false, false, Some(warning)) - } - } catch { - case Invalid(noInfo: NoClassBTypeInfo) => - val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) - CallsiteInfo(false, false, false, false, Some(warning)) - } - } - // TODO: run dataflow analyses to make the call graph more precise // - producers to get forwarded parameters (ForwardedParam) // - typeAnalysis for more precise argument types, more precise callee @@ -158,7 +121,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] declarationClassBType = classBTypeFromClassNode(declarationClassNode) } yield { - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) + val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, higherOrderParams, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) Callee( callee = method, calleeDeclarationClass = declarationClassBType, @@ -166,6 +129,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { safeToRewrite = safeToRewrite, annotatedInline = annotatedInline, annotatedNoInline = annotatedNoInline, + higherOrderParams = higherOrderParams, calleeInfoWarning = warning) } @@ -206,30 +170,73 @@ class CallGraph[BT <: BTypes](val btypes: BT) { closureInstantiations(methodNode) = methodClosureInstantiations } - def removeCallsite(invocation: MethodInsnNode, methodNode: MethodNode): Option[Callsite] = { - val methodCallsites = callsites(methodNode) - val newCallsites = methodCallsites - invocation - if (newCallsites.isEmpty) callsites.remove(methodNode) - else callsites(methodNode) = newCallsites - methodCallsites.get(invocation) - } - - def addCallsite(callsite: Callsite): Unit = { - val methodCallsites = callsites(callsite.callsiteMethod) - callsites(callsite.callsiteMethod) = methodCallsites + (callsite.callsiteInstruction -> callsite) - } - - def removeClosureInstantiation(indy: InvokeDynamicInsnNode, methodNode: MethodNode): Option[ClosureInstantiation] = { - val methodClosureInits = closureInstantiations(methodNode) - val newClosureInits = methodClosureInits - indy - if (newClosureInits.isEmpty) closureInstantiations.remove(methodNode) - else closureInstantiations(methodNode) = newClosureInits - methodClosureInits.get(indy) - } + /** + * Just a named tuple used as return type of `analyzeCallsite`. + */ + private case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, + annotatedInline: Boolean, annotatedNoInline: Boolean, + higherOrderParams: IntMap[ClassBType], + warning: Option[CalleeInfoWarning]) - def addClosureInstantiation(closureInit: ClosureInstantiation) = { - val methodClosureInits = closureInstantiations(closureInit.ownerMethod) - closureInstantiations(closureInit.ownerMethod) = methodClosureInits + (closureInit.lambdaMetaFactoryCall.indy -> closureInit) + /** + * Analyze a callsite and gather meta-data that can be used for inlining decisions. + */ + private def analyzeCallsite(calleeMethodNode: MethodNode, calleeDeclarationClassBType: ClassBType, receiverTypeInternalName: InternalName, calleeSource: Source): CallsiteInfo = { + val methodSignature = calleeMethodNode.name + calleeMethodNode.desc + + try { + // The inlineInfo.methodInfos of a ClassBType holds an InlineInfo for each method *declared* + // within a class (not for inherited methods). Since we already have the classBType of the + // callee, we only check there for the methodInlineInfo, we should find it there. + calleeDeclarationClassBType.info.orThrow.inlineInfo.methodInfos.get(methodSignature) match { + case Some(methodInlineInfo) => + val canInlineFromSource = compilerSettings.YoptInlineGlobal || calleeSource == CompilationUnit + + val isAbstract = BytecodeUtils.isAbstractMethod(calleeMethodNode) + + val receiverType = classBTypeFromParsedClassfile(receiverTypeInternalName) + // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: + // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined + // + // TODO: type analysis can render more calls statically resolved. Example: + // new A.f // can be inlined, the receiver type is known to be exactly A. + val isStaticallyResolved: Boolean = { + methodInlineInfo.effectivelyFinal || + receiverType.info.orThrow.inlineInfo.isEffectivelyFinal // (1) + } + + val isRewritableTraitCall = isStaticallyResolved && methodInlineInfo.traitMethodWithStaticImplementation + + val warning = calleeDeclarationClassBType.info.orThrow.inlineInfo.warning.map( + MethodInlineInfoIncomplete(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, _)) + + // (1) For invocations of final trait methods, the callee isStaticallyResolved but also + // abstract. Such a callee is not safe to inline - it needs to be re-written to the + // static impl method first (safeToRewrite). + // (2) Final trait methods can be rewritten from the interface to the static implementation + // method to enable inlining. + CallsiteInfo( + safeToInline = + canInlineFromSource && + isStaticallyResolved && // (1) + !isAbstract && + !BytecodeUtils.isConstructor(calleeMethodNode) && + !BytecodeUtils.isNativeMethod(calleeMethodNode), + safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) + annotatedInline = methodInlineInfo.annotatedInline, + annotatedNoInline = methodInlineInfo.annotatedNoInline, + higherOrderParams = inliner.heuristics.higherOrderParams(calleeMethodNode, receiverType), + warning = warning) + + case None => + val warning = MethodInlineInfoMissing(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, calleeDeclarationClassBType.info.orThrow.inlineInfo.warning) + CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning)) + } + } catch { + case Invalid(noInfo: NoClassBTypeInfo) => + val warning = MethodInlineInfoError(calleeDeclarationClassBType.internalName, calleeMethodNode.name, calleeMethodNode.desc, noInfo) + CallsiteInfo(false, false, false, false, IntMap.empty, Some(warning)) + } } /** @@ -277,12 +284,14 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * that can be safely re-written to the static implementation method. * @param annotatedInline True if the callee is annotated @inline * @param annotatedNoInline True if the callee is annotated @noinline + * @param higherOrderParams A map from parameter positions to SAM parameter types * @param calleeInfoWarning An inliner warning if some information was not available while * gathering the information about this callee. */ final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType, safeToInline: Boolean, safeToRewrite: Boolean, annotatedInline: Boolean, annotatedNoInline: Boolean, + higherOrderParams: IntMap[ClassBType], calleeInfoWarning: Option[CalleeInfoWarning]) { assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.") } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 3fbc374a93..ba6897b06e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -240,14 +240,17 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { callsiteMethod = ownerMethod, callsiteClass = closureInit.ownerClass, callee = bodyMethod.map({ - case (bodyMethodNode, bodyMethodDeclClass) => Callee( - callee = bodyMethodNode, - calleeDeclarationClass = classBTypeFromParsedClassfile(bodyMethodDeclClass), - safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, - safeToRewrite = false, // the lambda body method is not a trait interface method - annotatedInline = false, - annotatedNoInline = false, - calleeInfoWarning = None) + case (bodyMethodNode, bodyMethodDeclClass) => + val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass) + Callee( + callee = bodyMethodNode, + calleeDeclarationClass = bodyDeclClassType, + safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, + safeToRewrite = false, // the lambda body method is not a trait interface method + annotatedInline = false, + annotatedNoInline = false, + inliner.heuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), + calleeInfoWarning = None) }), argInfos = Nil, callsiteStackHeight = invocationStackHeight, diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index f4673be974..265685ad84 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -100,7 +100,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { * True for statically resolved trait callsites that should be rewritten to the static implementation method. */ def doRewriteTraitCallsite(callsite: Callsite) = callsite.callee match { - case Right(Callee(callee, calleeDeclarationClass, safeToInline, true, annotatedInline, annotatedNoInline, infoWarning)) => true + case Right(Callee(_, _, _, safeToRewrite, _, _, _, _)) => safeToRewrite case _ => false } @@ -113,7 +113,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { */ def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = { if (doRewriteTraitCallsite(callsite)) { - val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, infoWarning)) = callsite.callee + val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, higherOrderParams, infoWarning)) = callsite.callee val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc) @@ -160,6 +160,10 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction) callGraph.removeCallsite(callsite.callsiteInstruction, callsite.callsiteMethod) + val staticCallHigherOrderParams = { + if (selfParamType.info.get.inlineInfo.sam.isEmpty) higherOrderParams - 0 + else higherOrderParams.updated(0, selfParamType) + } val staticCallsite = Callsite( callsiteInstruction = newCallsiteInstruction, callsiteMethod = callsite.callsiteMethod, @@ -171,6 +175,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { safeToRewrite = false, annotatedInline = annotatedInline, annotatedNoInline = annotatedNoInline, + higherOrderParams = staticCallHigherOrderParams, calleeInfoWarning = infoWarning)), argInfos = Nil, callsiteStackHeight = callsite.callsiteStackHeight, diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index df2eb813d3..7cfc797317 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -7,6 +7,8 @@ package scala.tools.nsc package backend.jvm package opt +import scala.collection.immutable.IntMap +import scala.tools.asm.Type import scala.tools.asm.tree.{MethodNode, MethodInsnNode} import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.collection.convert.decorateAsScala._ @@ -20,21 +22,26 @@ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { case class PostInlineRequest(callsiteInstruction: MethodInsnNode, post: List[PostInlineRequest]) /** - * Select callsites from the call graph that should be inlined. The resulting list of inlining - * requests is allowed to have cycles, and the callsites can appear in any order. + * Select callsites from the call graph that should be inlined, grouped by the containing method. + * Cyclic inlining requests are allowed, the inliner will eliminate requests to break cycles. */ def selectCallsitesForInlining: Map[MethodNode, Set[InlineRequest]] = { - // We should only return inlining requests for callsites being compiled (not for callsites in + // We should only create inlining requests for callsites being compiled (not for callsites in // classes on the classpath). The call graph may contain callsites of classes parsed from the // classpath. In order to get only the callsites being compiled, we start at the map of // compilingClasses in the byteCodeRepository. - val compilingMethods = byteCodeRepository.compilingClasses.valuesIterator.flatMap(_.methods.iterator.asScala) - compilingMethods.map(methodNode => { - val requests = callGraph.callsites(methodNode).valuesIterator.filter({ - case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - val res = doInlineCallsite(callsite) + val compilingMethods = for { + classNode <- byteCodeRepository.compilingClasses.valuesIterator + methodNode <- classNode.methods.iterator.asScala + } yield methodNode - if (!res) { + compilingMethods.map(methodNode => { + var requests = Set.empty[InlineRequest] + callGraph.callsites(methodNode).valuesIterator foreach { + case callsite @ Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, _, warning)), _, _, _, pos) => + val request = inlineRequest(callsite) + requests ++= request + if (request.isEmpty) { if (annotatedInline && bTypes.compilerSettings.YoptWarningEmitAtInlineFailed) { // if the callsite is annotated @inline, we report an inline warning even if the underlying // reason is, for example, mixed compilation (which has a separate -Yopt-warning flag). @@ -52,27 +59,51 @@ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { } } - res - case Callsite(ins, _, _, Left(warning), _, _, _, pos) => if (warning.emitWarning(compilerSettings)) backendReporting.inlinerWarning(pos, s"failed to determine if ${ins.name} should be inlined:\n$warning") - false - }) - - (methodNode, requests.map(InlineRequest(_, Nil)).toSet) + } + (methodNode, requests) }).filterNot(_._2.isEmpty).toMap } + def higherOrderParams(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = { + var res = IntMap.empty[ClassBType] + val paramTypes = { + val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + val isStatic = BytecodeUtils.isStaticMethod(methodNode) + if (isStatic) params else receiverType +: params + } + for (i <- paramTypes.indices) { + paramTypes(i) match { + case c: ClassBType => + if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c) + + case _ => + } + } + res + } + /** - * The current inlining heuristics are simple: inline calls to methods annotated @inline. + * Returns the inline request for a callsite if the callsite should be inlined according to the + * current heuristics (`-Yopt-inline-heuristics`). + * + * The resulting inline request may contain post-inlining requests of callsites that in turn are + * also selected as individual inlining requests. */ - def doInlineCallsite(callsite: Callsite): Boolean = callsite match { - case Callsite(_, _, _, Right(Callee(callee, calleeDeclClass, safeToInline, _, annotatedInline, _, warning)), _, _, _, pos) => - if (compilerSettings.YoptInlineHeuristics.value == "everything") safeToInline - else annotatedInline && safeToInline + def inlineRequest(callsite: Callsite): Option[InlineRequest] = compilerSettings.YoptInlineHeuristics.value match { + case "everything" => + if (callsite.callee.get.safeToInline) Some(InlineRequest(callsite, Nil)) + else None + + case "at-inline-annotated" => + val callee = callsite.callee.get + if (callee.safeToInline && callee.annotatedInline) Some(InlineRequest(callsite, Nil)) + else None + +// case "default" => - case _ => false } /* diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index d727951a6b..135ebe9a78 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -6,6 +6,7 @@ import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.junit.Test import scala.collection.generic.Clearable +import scala.collection.immutable.IntMap import scala.collection.mutable.ListBuffer import scala.reflect.internal.util.{NoPosition, BatchSourceFile} import scala.tools.asm.Opcodes._ @@ -95,7 +96,7 @@ class InlinerTest extends ClearAfterClass { callsiteInstruction = callsiteInstruction, callsiteMethod = callsiteMethod, callsiteClass = callsiteClass, - callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, calleeInfoWarning = None)), + callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, higherOrderParams = IntMap.empty, calleeInfoWarning = None)), argInfos = Nil, callsiteStackHeight = callsiteStackHeight, receiverKnownNotNull = receiverKnownNotNull, -- cgit v1.2.3 From 89c0a7f0a320e898f360c7c94b1ad0bbce681b3a Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 26 Aug 2015 17:15:18 +0200 Subject: Store information about function literals in call graph Remember in the call graph if a function literal is passed as an argument to a higher-order function. --- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 62 ++++++++++++++++---- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 30 +++++----- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 16 +++--- .../tools/nsc/backend/jvm/opt/CallGraphTest.scala | 66 +++++++++++++++++++--- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 2 +- 5 files changed, 135 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index fcb8991baa..bf19bece8b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -16,7 +16,7 @@ import scala.collection.{concurrent, mutable} import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.tools.nsc.backend.jvm.BackendReporting._ -import scala.tools.nsc.backend.jvm.analysis.{NotNull, NullnessAnalyzer} +import scala.tools.nsc.backend.jvm.analysis.{ParameterProducer, ProdConsAnalyzer, NotNull, NullnessAnalyzer} import ByteCodeRepository.{Source, CompilationUnit} import BytecodeUtils._ @@ -114,6 +114,9 @@ class CallGraph[BT <: BTypes](val btypes: BT) { var methodCallsites = Map.empty[MethodInsnNode, Callsite] var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] + // lazy so it is only computed if actually used by computeArgInfos + lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) + methodNode.instructions.iterator.asScala foreach { case call: MethodInsnNode => val callee: Either[OptimizerWarning, Callee] = for { @@ -133,13 +136,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { calleeInfoWarning = warning) } - val argInfos = if (callee.isLeft) Nil else { - // TODO: for now it's Nil, because we don't run any data flow analysis - // there's no point in using the parameter types, that doesn't add any information. - // NOTE: need to run the same analyses after inlining, to re-compute the argInfos for the - // new duplicated callsites, see Inliner.inline - Nil - } + val argInfos = computeArgInfos(callee, call, methodNode, definingClass, Some(prodCons)) val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { val numArgs = Type.getArgumentTypes(call.desc).length @@ -169,6 +166,50 @@ class CallGraph[BT <: BTypes](val btypes: BT) { callsites(methodNode) = methodCallsites closureInstantiations(methodNode) = methodClosureInstantiations } + + def computeArgInfos( + callee: Either[OptimizerWarning, Callee], + callsiteInsn: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, + methodProdCons: => Option[ProdConsAnalyzer] = None): IntMap[ArgInfo] = { + if (callee.isLeft) IntMap.empty + else { + if (callee.get.higherOrderParams.nonEmpty) { + + val prodCons = methodProdCons.getOrElse({ + localOpt.minimalRemoveUnreachableCode(callsiteMethod, callsiteClass.internalName) + new ProdConsAnalyzer(callsiteMethod, callsiteClass.internalName) + }) + + // TODO: use type analysis instead - should be more efficient than prodCons + // some random thoughts: + // - assign special types to parameters and indy-lambda-functions to track them + // - upcast should not change type flow analysis: don't lose information. + // - can we do something about factory calls? Foo(x) for case class foo gives a Foo. + // inline the factory? analysis across method boundry? + + lazy val callFrame = prodCons.frameAt(callsiteInsn) + val receiverOrFirstArgSlot = { + val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) + callFrame.stackTop - numArgs + 1 + } + callee.get.higherOrderParams flatMap { + case (index, paramType) => + val prods = prodCons.initialProducersForValueAt(callsiteInsn, receiverOrFirstArgSlot + index) + if (prods.size != 1) None + else { + val argInfo = prods.head match { + case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral) + case ParameterProducer(local) => Some(ForwardedParam(local)) + case _ => None + } + argInfo.map((index, _)) + } + } + } else { + IntMap.empty + } + } + } /** * Just a named tuple used as return type of `analyzeCallsite`. @@ -254,7 +295,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * @param callsitePosition The source position of the callsite, used for inliner warnings. */ final case class Callsite(callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, - callee: Either[OptimizerWarning, Callee], argInfos: List[ArgInfo], + callee: Either[OptimizerWarning, Callee], argInfos: IntMap[ArgInfo], callsiteStackHeight: Int, receiverKnownNotNull: Boolean, callsitePosition: Position) { override def toString = "Invocation of" + @@ -267,8 +308,9 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * Information about invocation arguments, obtained through data flow analysis of the callsite method. */ sealed trait ArgInfo - final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo + case object FunctionLiteral extends ArgInfo final case class ForwardedParam(index: Int) extends ArgInfo + // final case class ArgTypeInfo(argType: BType, isPrecise: Boolean, knownNotNull: Boolean) extends ArgInfo // can be extended, e.g., with constant types /** diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index ba6897b06e..70e203aac1 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -9,6 +9,7 @@ package opt import scala.annotation.switch import scala.collection.immutable +import scala.collection.immutable.IntMap import scala.reflect.internal.util.NoPosition import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree._ @@ -235,24 +236,25 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // the method node is needed for building the call graph entry val bodyMethod = byteCodeRepository.methodNode(lambdaBodyHandle.getOwner, lambdaBodyHandle.getName, lambdaBodyHandle.getDesc) def bodyMethodIsBeingCompiled = byteCodeRepository.classNodeAndSource(lambdaBodyHandle.getOwner).map(_._2 == CompilationUnit).getOrElse(false) + val callee = bodyMethod.map({ + case (bodyMethodNode, bodyMethodDeclClass) => + val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass) + Callee( + callee = bodyMethodNode, + calleeDeclarationClass = bodyDeclClassType, + safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, + safeToRewrite = false, // the lambda body method is not a trait interface method + annotatedInline = false, + annotatedNoInline = false, + inliner.heuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), + calleeInfoWarning = None) + }) val bodyMethodCallsite = Callsite( callsiteInstruction = bodyInvocation, callsiteMethod = ownerMethod, callsiteClass = closureInit.ownerClass, - callee = bodyMethod.map({ - case (bodyMethodNode, bodyMethodDeclClass) => - val bodyDeclClassType = classBTypeFromParsedClassfile(bodyMethodDeclClass) - Callee( - callee = bodyMethodNode, - calleeDeclarationClass = bodyDeclClassType, - safeToInline = compilerSettings.YoptInlineGlobal || bodyMethodIsBeingCompiled, - safeToRewrite = false, // the lambda body method is not a trait interface method - annotatedInline = false, - annotatedNoInline = false, - inliner.heuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), - calleeInfoWarning = None) - }), - argInfos = Nil, + callee = callee, + argInfos = computeArgInfos(callee, bodyInvocation, ownerMethod, closureInit.ownerClass), callsiteStackHeight = invocationStackHeight, receiverKnownNotNull = true, // see below (*) callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 265685ad84..2918c33c74 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -8,6 +8,7 @@ package backend.jvm package opt import scala.annotation.tailrec +import scala.collection.immutable.IntMap import scala.tools.asm import asm.Handle import asm.Opcodes._ @@ -177,7 +178,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { annotatedNoInline = annotatedNoInline, higherOrderParams = staticCallHigherOrderParams, calleeInfoWarning = infoWarning)), - argInfos = Nil, + argInfos = callsite.argInfos, callsiteStackHeight = callsite.callsiteStackHeight, receiverKnownNotNull = callsite.receiverKnownNotNull, callsitePosition = callsite.callsitePosition @@ -410,6 +411,10 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) + callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) + callGraph.addIfMissing(callee, calleeDeclarationClass) // Add all invocation instructions and closure instantiations that were inlined to the call graph @@ -420,12 +425,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod = callsiteMethod, callsiteClass = callsiteClass, callee = originalCallsite.callee, - argInfos = Nil, // TODO: re-compute argInfos for new destination (once we actually compute them) + argInfos = computeArgInfos(originalCallsite.callee, newCallsiteIns, callsiteMethod, callsiteClass), // TODO: try to re-build argInfos from the original callsite's callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, receiverKnownNotNull = originalCallsite.receiverKnownNotNull, callsitePosition = originalCallsite.callsitePosition - ) - ) + )) } callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => @@ -441,10 +445,6 @@ class Inliner[BT <: BTypes](val btypes: BT) { // Inlining a method body can render some code unreachable, see example above (in runInliner). unreachableCodeEliminated -= callsiteMethod - callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) - instructionMap } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala index 715db3f8c2..b518cbdc50 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala @@ -6,6 +6,7 @@ import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.junit.Test import scala.collection.generic.Clearable +import scala.collection.immutable.IntMap import scala.tools.asm.Opcodes._ import org.junit.Assert._ @@ -21,18 +22,31 @@ import AsmUtils._ import BackendReporting._ import scala.collection.convert.decorateAsScala._ +import scala.tools.testing.ClearAfterClass -@RunWith(classOf[JUnit4]) -class CallGraphTest { - val compiler = newCompiler(extraArgs = "-Ybackend:GenBCode -Yopt:inline-global -Yopt-warnings") - import compiler.genBCode.bTypes._ +object CallGraphTest extends ClearAfterClass.Clearable { + var compiler = newCompiler(extraArgs = "-Ybackend:GenBCode -Yopt:inline-global -Yopt-warnings") + def clear(): Unit = { compiler = null } // allows inspecting the caches after a compilation run - val notPerRun: List[Clearable] = List(classBTypeFromInternalName, byteCodeRepository.compilingClasses, byteCodeRepository.parsedClasses, callGraph.callsites) + val notPerRun: List[Clearable] = List( + compiler.genBCode.bTypes.classBTypeFromInternalName, + compiler.genBCode.bTypes.byteCodeRepository.compilingClasses, + compiler.genBCode.bTypes.byteCodeRepository.parsedClasses, + compiler.genBCode.bTypes.callGraph.callsites) notPerRun foreach compiler.perRunCaches.unrecordCache +} + +@RunWith(classOf[JUnit4]) +class CallGraphTest extends ClearAfterClass { + ClearAfterClass.stateToClear = CallGraphTest - def compile(code: String, allowMessage: StoreReporter#Info => Boolean): List[ClassNode] = { - notPerRun.foreach(_.clear()) + val compiler = CallGraphTest.compiler + import compiler.genBCode.bTypes._ + import callGraph._ + + def compile(code: String, allowMessage: StoreReporter#Info => Boolean = _ => false): List[ClassNode] = { + CallGraphTest.notPerRun.foreach(_.clear()) compileClasses(compiler)(code, allowMessage = allowMessage) } @@ -107,7 +121,7 @@ class CallGraphTest { assert(callee.annotatedInline == atInline) assert(callee.annotatedNoInline == atNoInline) - assert(callsite.argInfos == List()) // not defined yet + assert(callsite.argInfos == IntMap.empty) // no higher-order methods } catch { case e: Throwable => println(callsite); throw e } @@ -135,4 +149,40 @@ class CallGraphTest { checkCallsite(df7Call, t2, cf7, cClassBType, false, true, true) checkCallsite(dg1Call, t2, g1, cMClassBType, true, false, false) } + + /** + * NOTE: if this test fails for you when running within the IDE, it's probably because you're + * using 2.12.0-M2 for compilining within the IDE, which doesn't add SAM information to the + * InlineInfo attribute. So the InlineInfo in the classfile for Function1 doesn't say that + * it's a SAM type. The test passes when running with ant (which does a full bootstrap). + */ + @Test + def checkArgInfos(): Unit = { + val code = + """abstract class C { + | def h(f: Int => Int): Int = f(1) + | def t1 = h(x => x + 1) + | def t2(i: Int, f: Int => Int, z: Int) = h(f) + i - z + | def t3(f: Int => Int) = h(x => f(x + 1)) + |} + |abstract class D { + | def iAmASam(x: Int): Int + | def selfSamCall = iAmASam(10) + |} + |""".stripMargin + val List(c, d) = compile(code) + + def callIn(m: String) = callGraph.callsites.find(_._1.name == m).get._2.values.head + val t1h = callIn("t1") + assertEquals(t1h.argInfos.toList, List((1, FunctionLiteral))) + + val t2h = callIn("t2") + assertEquals(t2h.argInfos.toList, List((1, ForwardedParam(2)))) + + val t3h = callIn("t3") + assertEquals(t3h.argInfos.toList, List((1, FunctionLiteral))) + + val selfSamCall = callIn("selfSamCall") + assertEquals(selfSamCall.argInfos.toList, List((0,ForwardedParam(0)))) + } } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index 135ebe9a78..6dfdf20587 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -97,7 +97,7 @@ class InlinerTest extends ClearAfterClass { callsiteMethod = callsiteMethod, callsiteClass = callsiteClass, callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, higherOrderParams = IntMap.empty, calleeInfoWarning = None)), - argInfos = Nil, + argInfos = IntMap.empty, callsiteStackHeight = callsiteStackHeight, receiverKnownNotNull = receiverKnownNotNull, callsitePosition = NoPosition), -- cgit v1.2.3 From 9dd5f95f956fbfd1dc4bab0c7ef69cc60bf85f2c Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 28 Aug 2015 10:35:44 +0200 Subject: Tone down inliner warnings for callsites not annotated @inline Before this change, with `-Yopt-warnings:at-inline-failed` or with the default options, the inliner would emit inline warnings for failed attempts of callsites that are not annotated @inline. A test for this change follows in a later commit, after enabling the new inliner heuristic for higher-order methods. --- src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala | 8 +++++--- src/compiler/scala/tools/nsc/settings/ScalaSettings.scala | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala index 8526f7e0ae..005d01f187 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendReporting.scala @@ -228,7 +228,7 @@ object BackendReporting { def emitWarning(settings: ScalaSettings): Boolean = this match { case _: IllegalAccessInstruction | _: MethodWithHandlerCalledOnNonEmptyStack | _: SynchronizedMethod | _: StrictfpMismatch | _: ResultingMethodTooLarge => - settings.YoptWarningEmitAtInlineFailed + settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) case IllegalAccessCheckFailed(_, _, _, _, _, cause) => cause.emitWarning(settings) @@ -246,9 +246,11 @@ object BackendReporting { case class ResultingMethodTooLarge(calleeDeclarationClass: InternalName, name: String, descriptor: String, callsiteClass: InternalName, callsiteName: String, callsiteDesc: String) extends CannotInlineWarning + // TODO: this should be a subtype of CannotInlineWarning + // but at the place where it's created (in findIllegalAccess) we don't have the necessary data (calleeName, calleeDescriptor). case object UnknownInvokeDynamicInstruction extends OptimizerWarning { override def toString = "The callee contains an InvokeDynamic instruction with an unknown bootstrap method (not a LambdaMetaFactory)." - def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarningEmitAtInlineFailed + def emitWarning(settings: ScalaSettings): Boolean = settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) } /** @@ -260,7 +262,7 @@ object BackendReporting { override def emitWarning(settings: ScalaSettings): Boolean = this match { case RewriteClosureAccessCheckFailed(_, cause) => cause.emitWarning(settings) - case RewriteClosureIllegalAccess(_, _) => settings.YoptWarningEmitAtInlineFailed + case RewriteClosureIllegalAccess(_, _) => settings.YoptWarnings.contains(settings.YoptWarningsChoices.anyInlineFailed) } override def toString: String = this match { diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index f6223926bf..ea9ad1d909 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -283,6 +283,7 @@ trait ScalaSettings extends AbsScalaSettings val none = Choice("none" , "No optimizer warnings.") val atInlineFailedSummary = Choice("at-inline-failed-summary" , "One-line summary if there were @inline method calls that could not be inlined.") val atInlineFailed = Choice("at-inline-failed" , "A detailed warning for each @inline method call that could not be inlined.") + val anyInlineFailed = Choice("any-inline-failed" , "A detailed warning for every callsite that was chosen for inlining by the heuristics, but could not be inlined.") val noInlineMixed = Choice("no-inline-mixed" , "In mixed compilation, warn at callsites methods defined in java sources (the inlining decision cannot be made without bytecode).") val noInlineMissingBytecode = Choice("no-inline-missing-bytecode" , "Warn if an inlining decision cannot be made because a the bytecode of a class or member cannot be found on the compilation classpath.") val noInlineMissingScalaInlineInfoAttr = Choice("no-inline-missing-attribute", "Warn if an inlining decision cannot be made because a Scala classfile does not have a ScalaInlineInfo attribute.") @@ -301,7 +302,8 @@ trait ScalaSettings extends AbsScalaSettings def YoptWarningEmitAtInlineFailed = !YoptWarnings.isSetByUser || YoptWarnings.contains(YoptWarningsChoices.atInlineFailedSummary) || - YoptWarnings.contains(YoptWarningsChoices.atInlineFailed) + YoptWarnings.contains(YoptWarningsChoices.atInlineFailed) || + YoptWarnings.contains(YoptWarningsChoices.anyInlineFailed) def YoptWarningNoInlineMixed = YoptWarnings.contains(YoptWarningsChoices.noInlineMixed) def YoptWarningNoInlineMissingBytecode = YoptWarnings.contains(YoptWarningsChoices.noInlineMissingBytecode) -- cgit v1.2.3 From c35c9c77a976b06aeae04d15493ec4995a2a6448 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 27 Aug 2015 22:12:28 +0200 Subject: Revert workaround for SI-8334 The new optimizer doesn't have this problem. --- .../scala/tools/nsc/backend/jvm/opt/CallGraph.scala | 12 ++++++++++++ src/library/scala/collection/immutable/List.scala | 3 --- test/files/pos/list-optim-check.flags | 1 - test/files/pos/list-optim-check.scala | 21 --------------------- test/files/run/t8334.scala | 17 +++++++++++++++++ 5 files changed, 29 insertions(+), 25 deletions(-) delete mode 100644 test/files/pos/list-optim-check.flags delete mode 100644 test/files/pos/list-optim-check.scala create mode 100644 test/files/run/t8334.scala (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index bf19bece8b..f9ba109358 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -239,6 +239,17 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // (1) A non-final method can be safe to inline if the receiver type is a final subclass. Example: // class A { @inline def f = 1 }; object B extends A; B.f // can be inlined // + // TODO: (1) doesn't cover the following example: + // trait TravLike { def map = ... } + // sealed trait List extends TravLike { ... } // assume map is not overridden + // final case class :: / final case object Nil + // (l: List).map // can be inlined + // we need to know that + // - the recevier is sealed + // - what are the children of the receiver + // - all children are final + // - none of the children overrides map + // // TODO: type analysis can render more calls statically resolved. Example: // new A.f // can be inlined, the receiver type is known to be exactly A. val isStaticallyResolved: Boolean = { @@ -336,6 +347,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { higherOrderParams: IntMap[ClassBType], calleeInfoWarning: Option[CalleeInfoWarning]) { assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.") + override def toString = s"Callee($calleeDeclarationClass.${callee.name})" } final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) { diff --git a/src/library/scala/collection/immutable/List.scala b/src/library/scala/collection/immutable/List.scala index 7b1997252d..eb095dbbc2 100644 --- a/src/library/scala/collection/immutable/List.scala +++ b/src/library/scala/collection/immutable/List.scala @@ -266,7 +266,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] (b.toList, these) } - @noinline // TODO - fix optimizer bug that requires noinline (see SI-8334) final override def map[B, That](f: A => B)(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { @@ -285,7 +284,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] else super.map(f) } - @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334) final override def collect[B, That](pf: PartialFunction[A, B])(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { @@ -315,7 +313,6 @@ sealed abstract class List[+A] extends AbstractSeq[A] else super.collect(pf) } - @noinline // TODO - fix optimizer bug that requires noinline for map; applied here to be safe (see SI-8334) final override def flatMap[B, That](f: A => GenTraversableOnce[B])(implicit bf: CanBuildFrom[List[A], B, That]): That = { if (bf eq List.ReusableCBF) { if (this eq Nil) Nil.asInstanceOf[That] else { diff --git a/test/files/pos/list-optim-check.flags b/test/files/pos/list-optim-check.flags deleted file mode 100644 index 49d036a887..0000000000 --- a/test/files/pos/list-optim-check.flags +++ /dev/null @@ -1 +0,0 @@ --optimize diff --git a/test/files/pos/list-optim-check.scala b/test/files/pos/list-optim-check.scala deleted file mode 100644 index f6e6ddec77..0000000000 --- a/test/files/pos/list-optim-check.scala +++ /dev/null @@ -1,21 +0,0 @@ -// Tests a map known to crash in optimizer with faster List map in SI-8240. -// Equivalent tests for collect and flatmap do not crash, but are provided -// anyway. -// See ticket SI-8334 for optimizer bug. -// TODO - Remove this test once SI-8334 is fixed and has its own test. -class A { - def f: Boolean = { - val xs = Nil map (_ => return false) - true - } - - def g: Boolean = { - val xs = Nil collect { case _ => return false } - true - } - - def h: Boolean = { - val xs = Nil flatMap { _ => return false } - true - } -} diff --git a/test/files/run/t8334.scala b/test/files/run/t8334.scala new file mode 100644 index 0000000000..bc7e97bd04 --- /dev/null +++ b/test/files/run/t8334.scala @@ -0,0 +1,17 @@ +object Test extends App { + def f: Boolean = { + val xs = Nil map (_ => return false) + true + } + + def g: Boolean = { + val xs = Nil collect { case _ => return false } + true + } + + def h: Boolean = { + val xs = Nil flatMap { _ => return false } + true + } + assert(f && g && h) +} -- cgit v1.2.3 From fef4b3dd5c330f1c2f18604e231ef7c45ac14d70 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 9 Sep 2015 15:57:41 +0200 Subject: Reduce component nesting in backend Make InlinerHeuristics a backend component like the others, instead of nested within the Inliner component. --- src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala | 7 +++++++ src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala | 4 +++- src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala | 2 ++ src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala | 1 + src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala | 2 +- .../scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala | 2 +- src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 4 +--- test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala | 4 ++-- 8 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala index cd7e0b83e8..18a495e5fd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/AsmUtils.scala @@ -55,6 +55,13 @@ object AsmUtils { node } + def readClass(filename: String): ClassNode = { + val f = new java.io.RandomAccessFile(filename, "r") + val b = new Array[Byte](f.length.toInt) + f.read(b) + readClass(b) + } + /** * Returns a human-readable representation of the cnode ClassNode. */ diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala index 12cd9c7f5c..6bc9f76e1a 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala @@ -44,6 +44,8 @@ abstract class BTypes { val inliner: Inliner[this.type] + val inlinerHeuristics: InlinerHeuristics[this.type] + val closureOptimizer: ClosureOptimizer[this.type] val callGraph: CallGraph[this.type] @@ -234,7 +236,7 @@ abstract class BTypes { InlineInfo( traitImplClassSelfType = None, isEffectivelyFinal = BytecodeUtils.isFinalClass(classNode), - sam = inliner.heuristics.javaSam(classNode.name), + sam = inlinerHeuristics.javaSam(classNode.name), methodInfos = methodInfos, warning) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index 160c1283f1..dc87ed631d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -42,6 +42,8 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { val inliner: Inliner[this.type] = new Inliner(this) + val inlinerHeuristics: InlinerHeuristics[this.type] = new InlinerHeuristics(this) + val closureOptimizer: ClosureOptimizer[this.type] = new ClosureOptimizer(this) val callGraph: CallGraph[this.type] = new CallGraph(this) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala index 03306f30aa..8d0547b607 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BackendStats.scala @@ -8,6 +8,7 @@ package backend.jvm import scala.reflect.internal.util.Statistics +// Enable with `-Ystatistics:jvm` object BackendStats { import Statistics.{newTimer, newSubTimer} val bcodeTimer = newTimer("time in backend", "jvm") diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index f9ba109358..7a0b41a49a 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -277,7 +277,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) annotatedInline = methodInlineInfo.annotatedInline, annotatedNoInline = methodInlineInfo.annotatedNoInline, - higherOrderParams = inliner.heuristics.higherOrderParams(calleeMethodNode, receiverType), + higherOrderParams = inlinerHeuristics.higherOrderParams(calleeMethodNode, receiverType), warning = warning) case None => diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 70e203aac1..a509bed5c5 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -246,7 +246,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { safeToRewrite = false, // the lambda body method is not a trait interface method annotatedInline = false, annotatedNoInline = false, - inliner.heuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), + inlinerHeuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), calleeInfoWarning = None) }) val bodyMethodCallsite = Callsite( diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 2918c33c74..144a899c03 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -25,9 +25,7 @@ import scala.tools.nsc.backend.jvm.BTypes.InternalName class Inliner[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ - - val heuristics: InlinerHeuristics[btypes.type] = new InlinerHeuristics(btypes) - import heuristics._ + import inlinerHeuristics._ def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index 6dfdf20587..a462b4ff0a 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -91,7 +91,7 @@ class InlinerTest extends ClearAfterClass { def makeInlineRequest( callsiteInstruction: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, callee: MethodNode, calleeDeclarationClass: ClassBType, callsiteStackHeight: Int, receiverKnownNotNull: Boolean, - post: List[inliner.heuristics.PostInlineRequest] = Nil) = inliner.heuristics.InlineRequest( + post: List[inlinerHeuristics.PostInlineRequest] = Nil) = inlinerHeuristics.InlineRequest( callsite = callGraph.Callsite( callsiteInstruction = callsiteInstruction, callsiteMethod = callsiteMethod, @@ -1093,7 +1093,7 @@ class InlinerTest extends ClearAfterClass { calleeDeclarationClass = cTp, callsiteStackHeight = analyzer.frameAt(gCall).getStackSize, receiverKnownNotNull = false, - post = List(inliner.heuristics.PostInlineRequest(fCall, Nil)) + post = List(inlinerHeuristics.PostInlineRequest(fCall, Nil)) ) val r = inliner.inline(request) -- cgit v1.2.3 From a477c40f13644dbd596126c238dbdc262639b002 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 17 Sep 2015 20:18:01 +0200 Subject: In the call graph, rename higherOrderParams to samParamTypes Because that's what the field holds: the parameter types that are SAM types. --- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 43 ++++++++++++++++++---- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 2 +- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 10 ++--- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 18 --------- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 2 +- 5 files changed, 42 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 7a0b41a49a..6442b81721 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -124,7 +124,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] declarationClassBType = classBTypeFromClassNode(declarationClassNode) } yield { - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, higherOrderParams, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) + val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) Callee( callee = method, calleeDeclarationClass = declarationClassBType, @@ -132,7 +132,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { safeToRewrite = safeToRewrite, annotatedInline = annotatedInline, annotatedNoInline = annotatedNoInline, - higherOrderParams = higherOrderParams, + samParamTypes = samParamTypes, calleeInfoWarning = warning) } @@ -173,7 +173,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { methodProdCons: => Option[ProdConsAnalyzer] = None): IntMap[ArgInfo] = { if (callee.isLeft) IntMap.empty else { - if (callee.get.higherOrderParams.nonEmpty) { + if (callee.get.samParamTypes.nonEmpty) { val prodCons = methodProdCons.getOrElse({ localOpt.minimalRemoveUnreachableCode(callsiteMethod, callsiteClass.internalName) @@ -192,7 +192,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) callFrame.stackTop - numArgs + 1 } - callee.get.higherOrderParams flatMap { + callee.get.samParamTypes flatMap { case (index, paramType) => val prods = prodCons.initialProducersForValueAt(callsiteInsn, receiverOrFirstArgSlot + index) if (prods.size != 1) None @@ -211,12 +211,39 @@ class CallGraph[BT <: BTypes](val btypes: BT) { } } + def samParamTypes(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = { + val paramTypes = { + val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + val isStatic = BytecodeUtils.isStaticMethod(methodNode) + if (isStatic) params else receiverType +: params + } + samTypes(paramTypes) + } + + def capturedSamTypes(lmf: LambdaMetaFactoryCall): IntMap[ClassBType] = { + val capturedTypes = Type.getArgumentTypes(lmf.indy.desc).map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) + samTypes(capturedTypes) + } + + private def samTypes(types: Array[BType]): IntMap[ClassBType] = { + var res = IntMap.empty[ClassBType] + for (i <- types.indices) { + types(i) match { + case c: ClassBType => + if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c) + + case _ => + } + } + res + } + /** * Just a named tuple used as return type of `analyzeCallsite`. */ private case class CallsiteInfo(safeToInline: Boolean, safeToRewrite: Boolean, annotatedInline: Boolean, annotatedNoInline: Boolean, - higherOrderParams: IntMap[ClassBType], + samParamTypes: IntMap[ClassBType], warning: Option[CalleeInfoWarning]) /** @@ -277,7 +304,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { safeToRewrite = canInlineFromSource && isRewritableTraitCall, // (2) annotatedInline = methodInlineInfo.annotatedInline, annotatedNoInline = methodInlineInfo.annotatedNoInline, - higherOrderParams = inlinerHeuristics.higherOrderParams(calleeMethodNode, receiverType), + samParamTypes = samParamTypes(calleeMethodNode, receiverType), warning = warning) case None => @@ -337,14 +364,14 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * that can be safely re-written to the static implementation method. * @param annotatedInline True if the callee is annotated @inline * @param annotatedNoInline True if the callee is annotated @noinline - * @param higherOrderParams A map from parameter positions to SAM parameter types + * @param samParamTypes A map from parameter positions to SAM parameter types * @param calleeInfoWarning An inliner warning if some information was not available while * gathering the information about this callee. */ final case class Callee(callee: MethodNode, calleeDeclarationClass: ClassBType, safeToInline: Boolean, safeToRewrite: Boolean, annotatedInline: Boolean, annotatedNoInline: Boolean, - higherOrderParams: IntMap[ClassBType], + samParamTypes: IntMap[ClassBType], calleeInfoWarning: Option[CalleeInfoWarning]) { assert(!(safeToInline && safeToRewrite), s"A callee of ${callee.name} can be either safeToInline or safeToRewrite, but not both.") override def toString = s"Callee($calleeDeclarationClass.${callee.name})" diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index a509bed5c5..f211c00c80 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -246,7 +246,7 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { safeToRewrite = false, // the lambda body method is not a trait interface method annotatedInline = false, annotatedNoInline = false, - inlinerHeuristics.higherOrderParams(bodyMethodNode, bodyDeclClassType), + samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType), calleeInfoWarning = None) }) val bodyMethodCallsite = Callsite( diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 144a899c03..1b017bb0da 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -112,7 +112,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { */ def rewriteFinalTraitMethodInvocation(callsite: Callsite): Unit = { if (doRewriteTraitCallsite(callsite)) { - val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, higherOrderParams, infoWarning)) = callsite.callee + val Right(Callee(callee, calleeDeclarationClass, _, _, annotatedInline, annotatedNoInline, samParamTypes, infoWarning)) = callsite.callee val traitMethodArgumentTypes = asm.Type.getArgumentTypes(callee.desc) @@ -159,9 +159,9 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsite.callsiteMethod.instructions.remove(callsite.callsiteInstruction) callGraph.removeCallsite(callsite.callsiteInstruction, callsite.callsiteMethod) - val staticCallHigherOrderParams = { - if (selfParamType.info.get.inlineInfo.sam.isEmpty) higherOrderParams - 0 - else higherOrderParams.updated(0, selfParamType) + val staticCallSamParamTypes = { + if (selfParamType.info.get.inlineInfo.sam.isEmpty) samParamTypes - 0 + else samParamTypes.updated(0, selfParamType) } val staticCallsite = Callsite( callsiteInstruction = newCallsiteInstruction, @@ -174,7 +174,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { safeToRewrite = false, annotatedInline = annotatedInline, annotatedNoInline = annotatedNoInline, - higherOrderParams = staticCallHigherOrderParams, + samParamTypes = staticCallSamParamTypes, calleeInfoWarning = infoWarning)), argInfos = callsite.argInfos, callsiteStackHeight = callsite.callsiteStackHeight, diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index 7cfc797317..cd10094204 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -67,24 +67,6 @@ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { }).filterNot(_._2.isEmpty).toMap } - def higherOrderParams(methodNode: MethodNode, receiverType: ClassBType): IntMap[ClassBType] = { - var res = IntMap.empty[ClassBType] - val paramTypes = { - val params = Type.getMethodType(methodNode.desc).getArgumentTypes.map(t => bTypeForDescriptorOrInternalNameFromClassfile(t.getDescriptor)) - val isStatic = BytecodeUtils.isStaticMethod(methodNode) - if (isStatic) params else receiverType +: params - } - for (i <- paramTypes.indices) { - paramTypes(i) match { - case c: ClassBType => - if (c.info.get.inlineInfo.sam.isDefined) res = res.updated(i, c) - - case _ => - } - } - res - } - /** * Returns the inline request for a callsite if the callsite should be inlined according to the * current heuristics (`-Yopt-inline-heuristics`). diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index a462b4ff0a..aa7184ffd8 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -96,7 +96,7 @@ class InlinerTest extends ClearAfterClass { callsiteInstruction = callsiteInstruction, callsiteMethod = callsiteMethod, callsiteClass = callsiteClass, - callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, higherOrderParams = IntMap.empty, calleeInfoWarning = None)), + callee = Right(callGraph.Callee(callee = callee, calleeDeclarationClass = calleeDeclarationClass, safeToInline = true, safeToRewrite = false, annotatedInline = false, annotatedNoInline = false, samParamTypes = IntMap.empty, calleeInfoWarning = None)), argInfos = IntMap.empty, callsiteStackHeight = callsiteStackHeight, receiverKnownNotNull = receiverKnownNotNull, -- cgit v1.2.3 From 4129a03ecc2f953e8b79ec1a2a545a96efb057b4 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 9 Sep 2015 16:44:38 +0200 Subject: Avoid re-computing argInfos after inlining and closure optimization The call graph holds an argInfos field for every callsite, which contains additional information about the argument passed to the callee. For example, an argInfo specifies if the argument is a function literal or a parameter of the callsite method. Computing the argInfo requires running an ASM analyzer, which is not cheap. This change assembles the argInfos for callsites that are created or changed by the inliner / closure optimizer from the available information instead of just running the analyzer again. --- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 97 ++++++++++++---------- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 9 +- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 17 +++- .../tools/nsc/backend/jvm/opt/CallGraphTest.scala | 24 ++++++ 4 files changed, 98 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 6442b81721..5eb4d033e6 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -136,7 +136,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { calleeInfoWarning = warning) } - val argInfos = computeArgInfos(callee, call, methodNode, definingClass, Some(prodCons)) + val argInfos = computeArgInfos(callee, call, prodCons) val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { val numArgs = Type.getArgumentTypes(call.desc).length @@ -155,10 +155,13 @@ class CallGraph[BT <: BTypes](val btypes: BT) { ) case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) => + val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) methodClosureInstantiations += indy -> ClosureInstantiation( - LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType), + lmf, methodNode, - definingClass) + definingClass, + capturedArgInfos) case _ => } @@ -166,48 +169,47 @@ class CallGraph[BT <: BTypes](val btypes: BT) { callsites(methodNode) = methodCallsites closureInstantiations(methodNode) = methodClosureInstantiations } - - def computeArgInfos( - callee: Either[OptimizerWarning, Callee], - callsiteInsn: MethodInsnNode, callsiteMethod: MethodNode, callsiteClass: ClassBType, - methodProdCons: => Option[ProdConsAnalyzer] = None): IntMap[ArgInfo] = { + + def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { if (callee.isLeft) IntMap.empty else { - if (callee.get.samParamTypes.nonEmpty) { - - val prodCons = methodProdCons.getOrElse({ - localOpt.minimalRemoveUnreachableCode(callsiteMethod, callsiteClass.internalName) - new ProdConsAnalyzer(callsiteMethod, callsiteClass.internalName) - }) - - // TODO: use type analysis instead - should be more efficient than prodCons - // some random thoughts: - // - assign special types to parameters and indy-lambda-functions to track them - // - upcast should not change type flow analysis: don't lose information. - // - can we do something about factory calls? Foo(x) for case class foo gives a Foo. - // inline the factory? analysis across method boundry? - - lazy val callFrame = prodCons.frameAt(callsiteInsn) - val receiverOrFirstArgSlot = { - val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) - callFrame.stackTop - numArgs + 1 - } - callee.get.samParamTypes flatMap { - case (index, paramType) => - val prods = prodCons.initialProducersForValueAt(callsiteInsn, receiverOrFirstArgSlot + index) - if (prods.size != 1) None - else { - val argInfo = prods.head match { - case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral) - case ParameterProducer(local) => Some(ForwardedParam(local)) - case _ => None - } - argInfo.map((index, _)) - } + lazy val numArgs = Type.getArgumentTypes(callsiteInsn.desc).length + (if (callsiteInsn.getOpcode == Opcodes.INVOKESTATIC) 0 else 1) + argInfosForSams(callee.get.samParamTypes, callsiteInsn, numArgs, prodCons) + } + } + + def computeCapturedArgInfos(lmf: LambdaMetaFactoryCall, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + val capturedSams = capturedSamTypes(lmf) + val numCaptures = Type.getArgumentTypes(lmf.indy.desc).length + argInfosForSams(capturedSams, lmf.indy, numCaptures, prodCons) + } + + private def argInfosForSams(sams: IntMap[ClassBType], consumerInsn: AbstractInsnNode, numConsumed: => Int, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { + // TODO: use type analysis instead of ProdCons - should be more efficient + // some random thoughts: + // - assign special types to parameters and indy-lambda-functions to track them + // - upcast should not change type flow analysis: don't lose information. + // - can we do something about factory calls? Foo(x) for case class foo gives a Foo. + // inline the factory? analysis across method boundary? + + // assign to a lazy val to prevent repeated evaluation of the by-name arg + lazy val prodConsI = prodCons + lazy val firstConsumedSlot = { + val consumerFrame = prodConsI.frameAt(consumerInsn) + consumerFrame.stackTop - numConsumed + 1 + } + sams flatMap { + case (index, _) => + val prods = prodConsI.initialProducersForValueAt(consumerInsn, firstConsumedSlot + index) + if (prods.size != 1) None + else { + val argInfo = prods.head match { + case LambdaMetaFactoryCall(_, _, _, _) => Some(FunctionLiteral) + case ParameterProducer(local) => Some(ForwardedParam(local)) + case _ => None + } + argInfo.map((index, _)) } - } else { - IntMap.empty - } } } @@ -377,7 +379,16 @@ class CallGraph[BT <: BTypes](val btypes: BT) { override def toString = s"Callee($calleeDeclarationClass.${callee.name})" } - final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType) { + /** + * Metadata about a closure instantiation, stored in the call graph + * + * @param lambdaMetaFactoryCall the InvokeDynamic instruction + * @param ownerMethod the method where the closure is allocated + * @param ownerClass the class containing the above method + * @param capturedArgInfos information about captured arguments. Used for updating the call + * graph when re-writing a closure invocation to the body method. + */ + final case class ClosureInstantiation(lambdaMetaFactoryCall: LambdaMetaFactoryCall, ownerMethod: MethodNode, ownerClass: ClassBType, capturedArgInfos: IntMap[ArgInfo]) { override def toString = s"ClosureInstantiation($lambdaMetaFactoryCall, ${ownerMethod.name + ownerMethod.desc}, $ownerClass)" } final case class LambdaMetaFactoryCall(indy: InvokeDynamicInsnNode, samMethodType: Type, implMethod: Handle, instantiatedMethodType: Type) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index f211c00c80..30b7f2edad 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -204,8 +204,8 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { insertLoadOps(invocation, ownerMethod, argumentLocalsList) // update maxStack - val capturesStackSize = localsForCapturedValues.size - val invocationStackHeight = stackHeight + capturesStackSize - 1 // -1 because the closure is gone + val numCapturedValues = localsForCapturedValues.locals.length // not `localsForCapturedValues.size`: every value takes 1 slot on the stack (also long / double), JVMS 2.6.2 + val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone if (invocationStackHeight > ownerMethod.maxStack) ownerMethod.maxStack = invocationStackHeight @@ -249,12 +249,15 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { samParamTypes = callGraph.samParamTypes(bodyMethodNode, bodyDeclClassType), calleeInfoWarning = None) }) + val argInfos = closureInit.capturedArgInfos ++ originalCallsite.map(cs => cs.argInfos map { + case (index, info) => (index + numCapturedValues, info) + }).getOrElse(IntMap.empty) val bodyMethodCallsite = Callsite( callsiteInstruction = bodyInvocation, callsiteMethod = ownerMethod, callsiteClass = closureInit.ownerClass, callee = callee, - argInfos = computeArgInfos(callee, bodyInvocation, ownerMethod, closureInit.ownerClass), + argInfos = argInfos, callsiteStackHeight = invocationStackHeight, receiverKnownNotNull = true, // see below (*) callsitePosition = originalCallsite.map(_.callsitePosition).getOrElse(NoPosition) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 1b017bb0da..1550f942c7 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -410,20 +410,26 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) // every value takes 1 slot on the stack (also long / double), JVMS 2.6.2 callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) callGraph.addIfMissing(callee, calleeDeclarationClass) + def mapArgInfo(argInfo: (Int, ArgInfo)): Option[(Int, ArgInfo)] = argInfo match { + case lit @ (_, FunctionLiteral) => Some(lit) + case (argIndex, ForwardedParam(paramIndex)) => callsite.argInfos.get(paramIndex).map((argIndex, _)) + } + // Add all invocation instructions and closure instantiations that were inlined to the call graph callGraph.callsites(callee).valuesIterator foreach { originalCallsite => val newCallsiteIns = instructionMap(originalCallsite.callsiteInstruction).asInstanceOf[MethodInsnNode] + val argInfos = originalCallsite.argInfos flatMap mapArgInfo callGraph.addCallsite(Callsite( callsiteInstruction = newCallsiteIns, callsiteMethod = callsiteMethod, callsiteClass = callsiteClass, callee = originalCallsite.callee, - argInfos = computeArgInfos(originalCallsite.callee, newCallsiteIns, callsiteMethod, callsiteClass), // TODO: try to re-build argInfos from the original callsite's + argInfos = argInfos, callsiteStackHeight = callsiteStackHeight + originalCallsite.callsiteStackHeight, receiverKnownNotNull = originalCallsite.receiverKnownNotNull, callsitePosition = originalCallsite.callsitePosition @@ -432,8 +438,13 @@ class Inliner[BT <: BTypes](val btypes: BT) { callGraph.closureInstantiations(callee).valuesIterator foreach { originalClosureInit => val newIndy = instructionMap(originalClosureInit.lambdaMetaFactoryCall.indy).asInstanceOf[InvokeDynamicInsnNode] + val capturedArgInfos = originalClosureInit.capturedArgInfos flatMap mapArgInfo callGraph.addClosureInstantiation( - ClosureInstantiation(originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), callsiteMethod, callsiteClass) + ClosureInstantiation( + originalClosureInit.lambdaMetaFactoryCall.copy(indy = newIndy), + callsiteMethod, + callsiteClass, + capturedArgInfos) ) } diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala index b518cbdc50..995e008912 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/CallGraphTest.scala @@ -185,4 +185,28 @@ class CallGraphTest extends ClearAfterClass { val selfSamCall = callIn("selfSamCall") assertEquals(selfSamCall.argInfos.toList, List((0,ForwardedParam(0)))) } + + @Test + def argInfoAfterInlining(): Unit = { + val code = + """class C { + | def foo(f: Int => Int) = f(1) // not inlined + | @inline final def bar(g: Int => Int) = foo(g) // forwarded param 1 + | @inline final def baz = foo(x => x + 1) // literal + | + | def t1 = bar(x => x + 1) // call to foo should have argInfo literal + | def t2(x: Int, f: Int => Int) = x + bar(f) // call to foo should have argInfo forwarded param 2 + | def t3 = baz // call to foo should have argInfo literal + | def someFun: Int => Int = null + | def t4(x: Int) = x + bar(someFun) // call to foo has empty argInfo + |} + """.stripMargin + + compile(code) + def callIn(m: String) = callGraph.callsites.find(_._1.name == m).get._2.values.head + assertEquals(callIn("t1").argInfos.toList, List((1, FunctionLiteral))) + assertEquals(callIn("t2").argInfos.toList, List((1, ForwardedParam(2)))) + assertEquals(callIn("t3").argInfos.toList, List((1, FunctionLiteral))) + assertEquals(callIn("t4").argInfos.toList, Nil) + } } -- cgit v1.2.3 From 382015824cfb3d1af2eb0dc3065b305e93185577 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 14 Sep 2015 11:45:21 +0200 Subject: Remove stack values from aliasing set when being dropped When entering an exception handler, all values are dropped from the stack (and the exception value is pushed). The ASM analyzer handles this by invoking `firstHandlerInstructionFrame.clearStack()`. To ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets. --- .../tools/nsc/backend/jvm/analysis/AliasingFrame.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 7bbe1e2a49..6ae6eddfef 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -204,6 +204,21 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } } + /** + * When entering an exception handler, all values are dropped from the stack (and the exception + * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To + * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets. + */ + override def clearStack(): Unit = { + var i = getLocals + val end = i + getStackSize + while (i < end) { + removeAlias(i) + i += 1 + } + super.clearStack() + } + /** * Merge the AliasingFrame `other` into this AliasingFrame. * -- cgit v1.2.3 From 72e1d0567ec011da5c519378c8883a492e67a25b Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 9 Sep 2015 17:14:17 +0200 Subject: Improve the performance of analyses tracking aliases Plenty of optimizations to make aliasing analysis faster. I cannot say how much faster: an analysis that used to not terminate now takes one second. The lessons are not new: - avoid boxing (of course) - avoid allocating in tight loops - use arrays, avoid hash sets / maps - specialize for common cases (see SmallBitSet in this commit) - avoid megamorphic calls - understand the details of the problem and find those twists that make it run faster More explanations in the code. --- .../nsc/backend/jvm/analysis/AliasingFrame.scala | 492 +++++++++++++++++++-- .../backend/jvm/analysis/NullnessAnalyzer.scala | 85 +--- .../backend/jvm/analysis/ProdConsAnalyzer.scala | 5 + .../tools/nsc/backend/jvm/analysis/package.scala | 329 ++++++++++++++ 4 files changed, 796 insertions(+), 115 deletions(-) create mode 100644 src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 6ae6eddfef..9e5fbfcc0e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -3,17 +3,22 @@ package backend.jvm package analysis import scala.annotation.switch -import scala.collection.{mutable, immutable} +import scala.collection.mutable import scala.tools.asm.Opcodes import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter} import opt.BytecodeUtils._ +import AliasSet.SmallBitSet -object AliasingFrame { - private var _idCounter: Long = 0l - private def nextId = { _idCounter += 1; _idCounter } -} - +/** + * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack. + * + * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple + * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower + * than a BasicValue analysis. + * + * See the doc of package object `analysis` for some notes on the performance of alias analysis. + */ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) { import Opcodes._ @@ -24,50 +29,66 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } /** - * For each slot (entry in the `values` array of the frame), an id that uniquely represents - * the object stored in it. If two values have the same id, they are aliases of the same - * object. - */ - private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId) - - /** - * The object alias id of for a value index. - */ - def aliasId(entry: Int) = aliasIds(entry) - - /** - * Returns the indices of the values array which are aliases of the object `id`. + * For every value the set of values that are aliases of it. + * + * Invariants: + * - If `aliases(i) == null` then i has no aliases. This is equivalent to having + * `aliases(i) == SingletonSet(i)`. + * - If `aliases(i) != null` then `aliases(i) contains i`. + * - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the + * same (mutable) AliasSet. */ - def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id) + val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize) /** * The set of aliased values for a given entry in the `values` array. */ - def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry)) + def aliasesOf(entry: Int): AliasSet = { + if (aliases(entry) != null) aliases(entry) + else { + val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1) + aliases(entry) = init + init + } + } /** - * Define a new alias. For example, given - * var a = this // this, a have the same aliasId - * then an assignment + * Define a new alias. For example, an assignment * b = a - * will set the same the aliasId for `b`. + * adds b to the set of aliases of a. */ private def newAlias(assignee: Int, source: Int): Unit = { - aliasIds(assignee) = aliasIds(source) + removeAlias(assignee) + val sourceAliases = aliasesOf(source) + sourceAliases += assignee + aliases(assignee) = sourceAliases } /** - * An assignment + * Remove an alias. For example, an assignment * a = someUnknownValue() - * sets a fresh alias id for `a`. - * A stack value is also removed from its alias set when being consumed. + * removes a from its former alias set. + * As another example, stack values are removed from their alias sets when being consumed. */ private def removeAlias(assignee: Int): Unit = { - aliasIds(assignee) = AliasingFrame.nextId + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + aliases(assignee) = null + } + } + + /** + * Define the alias set for a given value. + */ + private def setAliasSet(assignee: Int, set: AliasSet): Unit = { + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + } + aliases(assignee) = set } override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = { - // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop) + // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop) def stackTop: Int = this.stackTop def peekStack(n: Int): V = this.peekStack(n) @@ -166,14 +187,34 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } case SWAP => + // could be written more elegantly with higher-order combinators, but thinking of performance val top = stackTop - val idTop = aliasIds(top) - aliasIds(top) = aliasIds(top - 1) - aliasIds(top - 1) = idTop + + def moveNextToTop(): Unit = { + val nextAliases = aliases(top - 1) + aliases(top) = nextAliases + nextAliases -= (top - 1) + nextAliases += top + } + + if (aliases(top) != null) { + val topAliases = aliases(top) + if (aliases(top - 1) != null) moveNextToTop() + else aliases(top) = null + // move top to next + aliases(top - 1) = topAliases + topAliases -= top + topAliases += (top - 1) + } else { + if (aliases(top - 1) != null) { + moveNextToTop() + aliases(top - 1) = null + } + } case opcode => if (opcode == ASTORE) { - // Not a separate case because we need to remove the consumed stack value from alias sets after. + // not a separate case: we re-use the code below that removes the consumed stack value from alias sets val stackTopBefore = stackTop - produced + consumed val local = insn.asInstanceOf[VarInsnNode].`var` newAlias(assignee = local, source = stackTopBefore) @@ -198,9 +239,6 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc val firstConsumed = stackTop - produced + 1 // firstConsumed = 3 for (i <- 0 until consumed) removeAlias(firstConsumed + i) // remove aliases for 3 and 4 - - // We don't need to set the aliases ids for the produced values: the aliasIds array already - // contains fresh ids for non-used stack values (ensured by removeAlias). } } @@ -232,30 +270,124 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc * x = a * y = b // (x, a) and (y, b) * } - * [...] // (x, a) + * [...] // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b)) */ override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = { + // merge is the main performance hot spot of a data flow analysis. + + // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of + // the overall analysis time. val valuesChanged = super.merge(other, interpreter) + + // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this + // code has been heavily optimized. most of the time is spent in the `hasNext` method of the + // andNotIterator, see its comment. + var aliasesChanged = false val aliasingOther = other.asInstanceOf[AliasingFrame[_]] - for (i <- aliasIds.indices) { - val thisAliases = aliasesOf(i) - val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i)) - if (thisNotOther.nonEmpty) { - aliasesChanged = true - thisNotOther foreach removeAlias + + val numValues = getLocals + getStackSize + // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set + // for a, we already see that a and b will be aliases in the final result. so we can skip over + // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be + // set to `true`. + val knownOk = new Array[Boolean](numValues) + var i = 0 + while (i < numValues) { + if (!knownOk(i)) { + val thisAliases = this.aliases(i) + val otherAliases = aliasingOther.aliases(i) + if (thisAliases != null && otherAliases != null) { + // The iterator yields elements that are in `thisAliases` but not in `otherAliases`. + // As a side-effect, for every index `i` that is in both alias sets, the iterator sets + // `knownOk(i) = true`: the alias sets for these values don't need to be merged again. + val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk) + if (thisNotOtherIt.hasNext) { + aliasesChanged = true + val newSet = AliasSet.empty + while (thisNotOtherIt.hasNext) { + val next = thisNotOtherIt.next() + newSet += next + setAliasSet(next, newSet) + } + } + } } + i += 1 } + valuesChanged || aliasesChanged } + private def min(s: SmallBitSet) = { + var r = s.a + if ( s.b < r) r = s.b + if (s.c != -1 && s.c < r) r = s.c + if (s.d != -1 && s.d < r) r = s.d + r + } + override def init(src: Frame[_ <: V]): Frame[V] = { - super.init(src) - compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length) + super.init(src) // very quick (just an arraycopy) + System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick + + val newSets = mutable.HashMap.empty[AliasSet, AliasSet] + + // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to + // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time. + // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are + // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the + // two values. + + var i = 0 + while (i < aliases.length) { + val set = aliases(i) + if (set != null) { + // size cannot be 0 - alias sets are always at least singletons. + // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow + if (set.size == 1) { + aliases(i) = null + } else if (set.size <= 4) { + val small = set.set.asInstanceOf[AliasSet.SmallBitSet] + val firstOfSet = i == min(small) + if (firstOfSet) { + val newSet = set.clone() + aliases(small.a) = newSet + aliases(small.b) = newSet + if (small.c != -1) aliases(small.c) = newSet + if (small.d != -1) aliases(small.d) = newSet + } + } else { + // the actual hot spot is the hash map operations here: this is where almost all of the 20% + // mentioned above is spent. + // i also benchmarked an alternative implementation: keep an array of booleans for indexes + // that already contain the cloned set. iterate through all elements of the cloned set and + // assign the cloned set. this approach is 50% slower than using a hash map. + if (newSets contains set) aliases(i) = newSets(set) + else { + val newSet = set.clone() + newSets(set) = newSet + aliases(i) = newSet + } + } + } + i += 1 + } this } } +object AliasingFrame { +// val start1 = AliasingFrame.timer1.start() +// AliasingFrame.timer1.stop(start1) + import scala.reflect.internal.util.Statistics._ + val timer1 = newTimer("t1", "jvm") + val timer2 = newTimer("t2", "jvm") + val timer3 = newTimer("t3", "jvm") + val timers = List(timer1, timer2, timer3) + def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 } +} + /** * An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis * needs to track aliases, but doesn't require a more specific Frame subclass. @@ -264,3 +396,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack) override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src) } + +/** + * An iterator over Int (required to prevent boxing the result of next). + */ +abstract class IntIterator extends Iterator[Int] { + def hasNext: Boolean + def next(): Int +} + +/** + * An efficient mutable bit set. + * + * @param set Either a SmallBitSet or an Array[Long] + * @param size The size of the set, useful for performance of certain operations + */ +class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) { + import AliasSet._ + + override def toString: String = set.toString + + /** + * An iterator for the elements of this bit set. Note that only one iterator can be used at a + * time. Also make sure not to change the underlying AliasSet during iteration. + */ + def iterator: IntIterator = andNotIterator(this, empty, null) + + def +=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => s.a = value; size = 1 + case 1 => if (value != s.a) { s.b = value; size = 2 } + case 2 => if (value != s.a && value != s.b) { s.c = value; size = 3 } + case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 } + case 4 => + if (value != s.a && value != s.b && value != s.c && value != s.d) { + this.set = bsEmpty + this.size = 0 + bsAdd(this, s.a) + bsAdd(this, s.b) + bsAdd(this, s.c) + bsAdd(this, s.d) + bsAdd(this, value) + } + } + case bits: Array[Long] => + bsAdd(this, value) + } + + def -=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => + case 1 => + if (value == s.a) { s.a = -1; size = 0 } + case 2 => + if (value == s.a) { s.a = s.b; s.b = -1; size = 1 } + else if (value == s.b) { s.b = -1; size = 1 } + case 3 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 } + else if (value == s.b) { s.b = s.c; s.c = -1; size = 2 } + else if (value == s.c) { s.c = -1; size = 2 } + case 4 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.b) { s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.c) { s.c = s.d; s.d = -1; size = 3 } + else if (value == s.d) { s.d = -1; size = 3 } + } + case bits: Array[Long] => + bsRemove(this, value) + if (this.size == 4) + this.set = bsToSmall(this.set.asInstanceOf[Array[Long]]) + } + + override def clone(): AliasSet = { + val resSet = this.set match { + case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d) + case bits: Array[Long] => bits.clone() + } + new AliasSet(resSet, this.size) + } +} + +object AliasSet { + def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0) + + final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) { + override def toString = s"($a, $b, $c, $d)" + } + + def bsEmpty: Array[Long] = new Array[Long](1) + + private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = { + if (index < set.length) set + else { + var newLength = set.length + while (index >= newLength) newLength *= 2 + val newSet = new Array[Long](newLength) + Array.copy(set, 0, newSet, 0, set.length) + newSet + } + } + + def bsAdd(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + val resSet = bsEnsureCapacity(bits, index) + val before = resSet(index) + val result = before | (1l << bit) + if (result != before) { + resSet(index) = result + set.set = resSet + set.size += 1 + } + } + + def bsRemove(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + if (index < bits.length) { + val before = bits(index) + val result = before & ~(1l << bit) + if (result != before) { + bits(index) = result + set.size -= 1 + } + } + } + + def bsContains(set: Array[Long], bit: Int): Boolean = { + val index = bit >> 6 + bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L + } + +// var sizesHist: Array[Int] = new Array[Int](1000) + + /** + * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits. + */ + def bsToSmall(bits: Array[Long]): SmallBitSet = { + var a = -1 + var b = -1 + var c = -1 + var i = 0 + val end = bits.length * 64 + while (i < end) { + if (bsContains(bits, i)) { + if (a == -1) a = i + else if (b == -1) b = i + else if (c == -1) c = i + else return new SmallBitSet(a, b, c, i) + } + i += 1 + } + null + } + + /** + * An iterator that yields the elements that are in one bit set and not in another (&~). + */ + private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator { + // values in the first bit set + private var a, b, c, d = -1 + private var xs: Array[Long] = null + + // values in the second bit set + private var notA, notB, notC, notD = -1 + private var notXs: Array[Long] = null + + // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext + private var abcdNext = -1 + + // counts through elements in the `xs` bit set + private var i = 0 + // true if the current value of `i` should be returned by this iterator + private var iValid = false + + setA.set match { + case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d + case bits: Array[Long] => xs = bits + } + + setB.set match { + case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d + case bits: Array[Long] => notXs = bits + } + + // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true. + // hacky side-effect, used for performance of AliasingFrame.merge. + private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true + + private def checkABCD(x: Int, num: Int): Boolean = { + // assert(x == a && num == 1 || x == b && num == 2 || ...) + x != -1 && { + val otherHasA = x == notA || x == notB || x == notC || x == notD || (notXs != null && bsContains(notXs, x)) + if (otherHasA) setThisAndOther(x) + else abcdNext = x + (num: @switch) match { + case 1 => a = -1 + case 2 => b = -1 + case 3 => c = -1 + case 4 => d = -1 + } + !otherHasA + } + } + + // main performance hot spot + private def checkXs = { + (xs != null) && { + val end = xs.length * 64 + + while (i < end && { + val index = i >> 6 + if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time. + i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body + true + } else { + val mask = 1l << i + // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks + val thisHasI = (xs(index) & mask) != 0l + !thisHasI || { + val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l) + if (otherHasI) setThisAndOther(i) + otherHasI + } + } + }) i += 1 + + iValid = i < end + iValid + } + } + + // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time + // is spent here. within hasNext, almost the entire time is spent in `checkXs`. + // + def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs + + def next(): Int = { + if (hasNext) { + if (abcdNext != -1) { + val r = abcdNext; abcdNext = -1; r + } else { + val r = i; i += 1; iValid = false; r + } + } else Iterator.empty.next() + } + } + +// The number of bits in a bit array. Useful for debugging. +// def bsSize(bits: Array[Long]) = { +// var r = 0 +// var i = 0 +// while (i < bits.length) { +// r += java.lang.Long.bitCount(bits(i)) +// i += 1 +// } +// r +// } + + /** + * An iterator returning the elements in a that are not also in b (a &~ b). + * + * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that + * is both in a and b (&). + */ + def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther) +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala index 31710dcbee..f6d249db7b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala @@ -7,66 +7,12 @@ import java.util import scala.annotation.switch import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode} -import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value} +import scala.tools.asm.tree.analysis._ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils import BytecodeUtils._ /** - * Some notes on the ASM ananlyzer framework. - * - * Value - * - Abstract, needs to be implemented for each analysis. - * - Represents the desired information about local variables and stack values, for example: - * - Is this value known to be null / not null? - * - What are the instructions that could potentially have produced this value? - * - * Interpreter - * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing - * interpreter, e.g., SourceInterpreter or BasicInterpreter. - * - Multiple abstract methods that receive an instruction and the instruction's input values, and - * return a value representing the result of that instruction. - * - Note: due to control flow, the interpreter can be invoked multiple times for the same - * instruction, until reaching a fixed point. - * - Abstract `merge` function that computes the least upper bound of two values. Used by - * Frame.merge (see below). - * - * Frame - * - Can be used directly for many analyses, no subclass required. - * - Every frame has an array of values: one for each local variable and for each stack slot. - * - A `top` index stores the index of the current stack top - * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty - * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next - * stack value. - * - Defines the `execute(instruction)` method. - * - executing mutates the state of the frame according to the effect of the instruction - * - pop consumed values from the stack - * - pass them to the interpreter together with the instruction - * - if applicable, push the resulting value on the stack - * - Defines the `merge(otherFrame)` method - * - called by the analyzer when multiple control flow paths lead to an instruction - * - the frame at the branching instruction is merged into the current frame of the - * instruction (held by the analyzer) - * - mutates the values of the current frame, merges all values using interpreter.merge. - * - * Analyzer - * - Stores a frame for each instruction - * - `merge` function takes an instruction and a frame, merges the existing frame for that instr - * (from the frames array) with the new frame passed as argument. - * if the frame changed, puts the instruction on the work queue (fixpiont). - * - initial frame: initialized for first instr by calling interpreter.new[...]Value - * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` - * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) - * - analyze(method): simulate control flow. while work queue non-empty: - * - copy the state of `frames[instr]` into a local frame `current` - * - call `current.execute(instr, interpreter)`, mutating the `current` frame - * - if it's a branching instruction - * - for all potential destination instructions - * - merge the destination instruction frame with the `current` frame - * (this enqueues the destination instr if its frame changed) - * - invoke `newControlFlowEdge` (see below) - * - the analyzer also tracks active exception handlers at each instruction - * - the empty method `newControlFlowEdge` can be overridden to track control flow if required - * + * See the package object `analysis` for details on the ASM analysis framework. * * Some notes on nullness analysis. * @@ -219,8 +165,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = { import Opcodes._ - // get the object id of the object that is known to be not-null after this operation - val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match { + // get the alias set the object that is known to be not-null after this operation. + // alias sets are mutable / mutated, so after super.execute, this set contains the remaining + // aliases of the value that becomes not-null. + val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match { case IALOAD | LALOAD | FALOAD | @@ -229,7 +177,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal BALOAD | CALOAD | SALOAD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case IASTORE | FASTORE | @@ -239,35 +187,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal SASTORE | LASTORE | DASTORE => - aliasId(this.stackTop - 2) + aliasesOf(this.stackTop - 2) case GETFIELD => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case PUTFIELD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case INVOKEVIRTUAL | INVOKESPECIAL | INVOKEINTERFACE => val desc = insn.asInstanceOf[MethodInsnNode].desc val numArgs = Type.getArgumentTypes(desc).length - aliasId(this.stackTop - numArgs) + aliasesOf(this.stackTop - numArgs) case ARRAYLENGTH | MONITORENTER | MONITOREXIT => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case _ => - -1 + null } super.execute(insn, interpreter) - if (nullCheckedAliasId != -1) { - for (i <- valuesWithAliasId(nullCheckedAliasId)) - this.setValue(i, NotNullValue) + if (nullCheckedAliases != null) { + val it = nullCheckedAliases.iterator + while (it.hasNext) + this.setValue(it.next(), NotNullValue) } } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala index 1c24acba03..f2d8dc910a 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala @@ -55,6 +55,11 @@ import scala.collection.convert.decorateAsScala._ * * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as * copying operations. + * + * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM) + * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge + * function (merging producer sets) is more complex than merging simple basic values. + * See also the doc comment in the package object `analysis`. */ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala new file mode 100644 index 0000000000..402357c55b --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala @@ -0,0 +1,329 @@ +package scala.tools.nsc.backend.jvm + +/** + * Summary on the ASM ananlyzer framework + * -------------------------------------- + * + * Value + * - Abstract, needs to be implemented for each analysis. + * - Represents the desired information about local variables and stack values, for example: + * - Is this value known to be null / not null? + * - What are the instructions that could potentially have produced this value? + * + * Interpreter + * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing + * interpreter, e.g., SourceInterpreter or BasicInterpreter. + * - Multiple abstract methods that receive an instruction and the instruction's input values, and + * return a value representing the result of that instruction. + * - Note: due to control flow, the interpreter can be invoked multiple times for the same + * instruction, until reaching a fixed point. + * - Abstract `merge` function that computes the least upper bound of two values. Used by + * Frame.merge (see below). + * + * Frame + * - Can be used directly for many analyses, no subclass required. + * - Every frame has an array of values: one for each local variable and for each stack slot. + * - A `top` index stores the index of the current stack top + * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty + * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next + * stack value. + * - Defines the `execute(instruction)` method. + * - executing mutates the state of the frame according to the effect of the instruction + * - pop consumed values from the stack + * - pass them to the interpreter together with the instruction + * - if applicable, push the resulting value on the stack + * - Defines the `merge(otherFrame)` method + * - called by the analyzer when multiple control flow paths lead to an instruction + * - the frame at the branching instruction is merged into the current frame of the + * instruction (held by the analyzer) + * - mutates the values of the current frame, merges all values using interpreter.merge. + * + * Analyzer + * - Stores a frame for each instruction + * - `merge` function takes an instruction and a frame, merges the existing frame for that instr + * (from the frames array) with the new frame passed as argument. + * if the frame changed, puts the instruction on the work queue (fixpiont). + * - initial frame: initialized for first instr by calling interpreter.new[...]Value + * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` + * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) + * - analyze(method): simulate control flow. while work queue non-empty: + * - copy the state of `frames[instr]` into a local frame `current` + * - call `current.execute(instr, interpreter)`, mutating the `current` frame + * - if it's a branching instruction + * - for all potential destination instructions + * - merge the destination instruction frame with the `current` frame + * (this enqueues the destination instr if its frame changed) + * - invoke `newControlFlowEdge` (see below) + * - the analyzer also tracks active exception handlers at each instruction + * - the empty method `newControlFlowEdge` can be overridden to track control flow if required + * + * + * Lessons learnt while benchmarking the alias tracking analysis + * ------------------------------------------------------------- + * + * Profiling + * - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details + * of a hotspot, don't pay too much attention to the percentages / time counts. + * - Should also try other profilers. + * - Use timers. When a method showed up as a hotspot, i added a timer around that method, and a + * second one within the method to measure specific parts. The timers slow things down, but the + * relative numbers show what parts of a method are slow. + * + * ASM analyzer insights + * - The time for running an analysis depends on the number of locals and the number of instructions. + * Reducing the number of locals helps speeding up the analysis: there are less values to + * merge when merging to frames. + * See also https://github.com/scala/scala-dev/issues/47 + * - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers. + * - For nullness analysis the time is spent as follows + * - 20% merging nullness values. this is as expected: for example, the same absolute amount of + * time is spent in merging BasicValues when running a BasicInterpreter. + * - 50% merging alias sets. i tried to optimize what i could out of this. + * - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init. + * - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to + * `interpreter.merge`. This can be observed easily by running a test program that either runs + * a BasicValue analysis only, versus a program that first runs a nullness analysis and then + * a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms, + * a 3.8x slowdown. + * - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the + * examples I benchmarked, the number of merge invocations was always exactly the same. + * It would probably be possible to come up with an example where alias set merging forces + * additional analysis rounds until reaching the fixpoint, but I did not observe such cases. + * + * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler + * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze + * can simply be parsed from a classfile. See example at the end of this comment. + * + * + * Nullness Analysis in Miguel's Optimizer + * --------------------------------------- + * + * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every + * frame has an array of values. Miguel's idea was to represent aliasing using reference equality + * in the values array: if two entries in the array point to the same value object, the two entries + * are aliases in the frame of the given instruction. + * + * While this idea seems elegant at first sight, Miguel's implementation does not merge frames + * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in + * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that + * c is removed as an alias of (a, b). + * + * It would be possible to implement correct alias set merging in Miguel's approach. However, frame + * merging is the main hot spot of analysis. The computational complexity of implementing alias set + * merging by traversing the values array and comparing references is too high. The concrete + * alias set representation that is used in the current implementation (see class AliasingFrame) + * makes alias set merging more efficient. + * + * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java + * + * + * Complexity and scaling of analysis + * ---------------------------------- + * + * The time complexity of a data flow analysis depends on: + * + * - The size of the method. The complexity factor is linear (assuming the number of locals and + * branching instructions remains constant). The main analysis loop runs through all + * instructions of a method once. Instructions are only re-enqueued if a control flow merge + * changes the frame at some instruction. + * + * - The branching instructions. When a second (third, ..) control flow edge arrives at an + * instruction, the existing frame at the instruction is merged with the one computed on the + * new branch. If the merge function changes the existing frame, the instruction is enqueued + * for another analysis. This results in a merge operation for the successors of the + * instruction. + * + * - The number of local variables. The hot spot of analysis is frame merging. The merge function + * iterates through the values in the frame (locals and stack values) and merges them. + * + * I measured the running time of an analysis for two examples: + * - Keep the number of locals and branching instructions constant, increase the number of + * instructions. The running time grows linearly with the method size. + * - Increase the size and number of locals in a method. The method size and number of locals + * grow in the same pace. Here, the running time increase is polynomial. It looks like the + * complexity is be #instructions * #locals^2 (see below). + * + * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs + * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change. + * + * The numbers below are for nullness. Note that the the last column is constant, i.e., the running + * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal + * method size for running an analysis. + * + * #insns #locals time (ms) time / #ins * #loc^2 * 10^6 + * 1305 156 34 1.07 + * 2610 311 165 0.65 + * 3915 466 490 0.57 + * 5220 621 1200 0.59 + * 6525 776 2220 0.56 + * 7830 931 3830 0.56 + * 9135 1086 6570 0.60 + * 10440 1241 9700 0.60 + * 11745 1396 13800 0.60 + * + * As a second experiment, nullness analysis was run with varying #insns but constant #locals. + * The last column shows linear complexity with respect to the method size (linearOffset = 2279): + * + * #insns #locals time (ms) (time + linearOffset) / #insns + * 5220 621 1090 0.645 + * 6224 621 1690 0.637 + * 7226 621 2280 0.630 + * 8228 621 2870 0.625 + * 9230 621 3530 0.629 + * 10232 621 4130 0.626 + * 11234 621 4770 0.627 + * 12236 621 5520 0.637 + * 13238 621 6170 0.638 + * + * + * When running a BasicValue analysis, the complexity observation is the same (time is proportional + * to #ins * #loc^2). + * + * + * Measuring analysis execution time + * --------------------------------- + * + * See code below. + */ + +/* +object Test { + val overwrite: Option[String] = null + + @noinline def serialize(o: AnyRef): String = null + + @noinline def deserialize(string: String): AnyRef = null + + @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) { + val result = serialize(instance) + val reconstituted = deserialize(result).asInstanceOf[T] + assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted))) + } + + @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) { + // pattern match to introduce a lot of control flow, i.e., a lot of frame merges + overwrite match { + case Some(f) => + case None => + checkRoundTrip(instance)(f) + assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance) + assert(prevResult == "res", instance) + } + } + + // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x + + def testMain(): Unit = { + // every call to check creates quite a number of locals, and also quite a number of aliases + // of the same value (x1). First of all, the default argument call is expanded as below. Then + // method check is inlined, and within the body of check, checkRoundTrip and assert have + // already been inlined as well. + + // { + // val x1 = () => "" + // val x2 = fun(x1()) // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1 + // check(x1())("", x2) // same here for x1 + // } + + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 5 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 10 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 15 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 20 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 25 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 30 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 35 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 40 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 45 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 50 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 55 + + // 1000 bytecode instructions, 0 locals + // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); + } + + def timed[T](f: => T): T = { + val start = System.nanoTime() + val r = f + val nanos = System.nanoTime() - start + println(s"took ${nanos/1000000}ms") + r + } + + def main(args: Array[String]): Unit = { + import scala.tools.nsc.backend.jvm._ + val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class") + import scala.collection.convert.decorateAsScala._ + val m = cn.methods.iterator.asScala.find(_.name == "testMain").head + + println(s"${m.instructions.size} instructions - ${m.maxLocals} locals") + + val a = new analysis.NullnessAnalyzer + a.analyze(cn.name, m) // warm up + + analysis.AliasingFrame.reset() + timed(a.analyze(cn.name, m)) + analysis.AliasingFrame.timers foreach println + + println("---") + + // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue + // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside + // Frame.merge is no longer megamorphic. + + import scala.tools.asm.tree.analysis._ + val ba = new Analyzer(new BasicInterpreter) + ba.analyze(cn.name, m) // warm up + + timed(ba.analyze(cn.name, m)) + + println("---") + + timed(a.analyze(cn.name, m)) + } +} +*/ +package object analysis -- cgit v1.2.3 From 2a9466723833caae0ec7b5c48953917b62a2e910 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 9 Sep 2015 17:16:57 +0200 Subject: Cleanups and performance fixes in Nullness analysis --- .../backend/jvm/analysis/NullnessAnalyzer.scala | 86 +++++++------------ .../tools/nsc/backend/jvm/opt/CallGraph.scala | 4 +- .../jvm/analysis/NullnessAnalyzerTest.scala | 97 +++++++++++++--------- 3 files changed, 89 insertions(+), 98 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala index f6d249db7b..f9ac12eb4d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala @@ -32,57 +32,35 @@ import BytecodeUtils._ * We still have to figure out how to do this exactly in the analyzer framework. */ -/** - * Type to represent nullness of values. - */ -sealed trait Nullness { - final def merge(other: Nullness) = if (this == other) this else Unknown -} -case object NotNull extends Nullness -case object Unknown extends Nullness -case object Null extends Nullness - /** * Represents the nullness state for a local variable or stack value. * - * Note that nullness of primitive values is not tracked, it will be always [[Unknown]]. + * Note that nullness of primitive values is not tracked, it will be always unknown. */ -sealed trait NullnessValue extends Value { - /** - * The nullness of this value. - */ - def nullness: Nullness - - /** - * True if this value is a long or double. The Analyzer framework needs to know - * the size of each value when interpreting instructions, see `Frame.execute`. - */ - def isSize2: Boolean +sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value { /** * The size of the slot described by this value. Cannot be 0 because no values are allocated * for void-typed slots, see NullnessInterpreter.newValue. **/ def getSize: Int = if (isSize2) 2 else 1 - def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2) + def merge(other: NullnessValue) = { + if (this eq other) this + else if (this eq UnknownValue2) this // the only possible value of size two + else UnknownValue1 + } + + final override def equals(other: Any) = this eq other.asInstanceOf[Object] } -object NullValue extends NullnessValue { def nullness = Null; def isSize2 = false; override def toString = "Null" } -object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" } -object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true; override def toString = "Unknown2" } -object NotNullValue extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull" } +object NullValue extends NullnessValue(isSize2 = false) { override def toString = "Null" } +object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" } +object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" } +object NotNullValue extends NullnessValue(isSize2 = false) { override def toString = "NotNull" } object NullnessValue { - def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = { - if (nullness == Null) NullValue - else if (nullness == NotNull) NotNullValue - else if (isSize2) UnknownValue2 - else UnknownValue1 - } - - def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = { - apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2) - } + def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1 + def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1 } final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) { @@ -97,29 +75,25 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes // `newValue(null)` for each local variable. We have to return a value of size 1. if (tp == Type.VOID_TYPE) null // (1) - else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) + else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) } override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = { // For instance methods, the `this` parameter is known to be not null. - if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false) + if (isInstanceMethod && local == 0) NotNullValue else super.newParameterValue(isInstanceMethod, local, tp) } - def newOperation(insn: AbstractInsnNode): NullnessValue = { - val nullness = (insn.getOpcode: @switch) match { - case Opcodes.ACONST_NULL => Null - - case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { - case _: String | _: Type => NotNull - case _ => Unknown - } + def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match { + case Opcodes.ACONST_NULL => NullValue - case _ => Unknown + case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { + case _: String | _: Type => NotNullValue + case _ => NullnessValue.unknown(insn) } // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call. - NullnessValue(nullness, insn) + case _ => NullnessValue.unknown(insn) } def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value @@ -128,26 +102,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) case Opcodes.CHECKCAST => value case Opcodes.NEWARRAY | - Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false) + Opcodes.ANEWARRAY => NotNullValue - case _ => NullnessValue(Unknown, insn) + case _ => NullnessValue.unknown(insn) } def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = { - NullnessValue(Unknown, insn) + NullnessValue.unknown(insn) } - def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = { - NullnessValue(Unknown, isSize2 = false) - } + def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1 def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match { case Opcodes.MULTIANEWARRAY => - NullnessValue(NotNull, isSize2 = false) + NotNullValue case _ => // TODO: use a list of methods that are known to return non-null values - NullnessValue(Unknown, insn) + NullnessValue.unknown(insn) } def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = () diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 5eb4d033e6..87b51c55b4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -16,7 +16,7 @@ import scala.collection.{concurrent, mutable} import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName import scala.tools.nsc.backend.jvm.BackendReporting._ -import scala.tools.nsc.backend.jvm.analysis.{ParameterProducer, ProdConsAnalyzer, NotNull, NullnessAnalyzer} +import scala.tools.nsc.backend.jvm.analysis._ import ByteCodeRepository.{Source, CompilationUnit} import BytecodeUtils._ @@ -106,7 +106,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match { case nullnessAnalyzer: NullnessAnalyzer => val frame = nullnessAnalyzer.frameAt(call, methodNode) - frame.getStack(frame.getStackSize - 1 - numArgs).nullness == NotNull + frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue case _ => false } diff --git a/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala index 94e776aadb..9cf6e366d2 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala @@ -38,9 +38,9 @@ class NullnessAnalyzerTest extends ClearAfterClass { nullnessAnalyzer } - def testNullness(analyzer: NullnessAnalyzer, method: MethodNode, query: String, index: Int, nullness: Nullness): Unit = { + def testNullness(analyzer: NullnessAnalyzer, method: MethodNode, query: String, index: Int, nullness: NullnessValue): Unit = { for (i <- findInstr(method, query)) { - val r = analyzer.frameAt(i, method).getValue(index).nullness + val r = analyzer.frameAt(i, method).getValue(index) assertTrue(s"Expected: $nullness, found: $r. At instr ${textify(i)}", nullness == r) } } @@ -77,6 +77,7 @@ class NullnessAnalyzerTest extends ClearAfterClass { |INVOKEVIRTUAL java/lang/Object.toString ()Ljava/lang/String;: 0: NotNull, 1: NotNull | ARETURN: 0: NotNull, 1: Unknown1 | L0: null""".stripMargin +// println(showAllNullnessFrames(newNullnessAnalyzer(m), m)) assertEquals(showAllNullnessFrames(newNullnessAnalyzer(m), m), res) } @@ -84,15 +85,15 @@ class NullnessAnalyzerTest extends ClearAfterClass { def thisNonNull(): Unit = { val List(m) = compileMethods(noOptCompiler)("def f = this.toString") val a = newNullnessAnalyzer(m) - testNullness(a, m, "ALOAD 0", 0, NotNull) + testNullness(a, m, "ALOAD 0", 0, NotNullValue) } @Test def instanceMethodCall(): Unit = { val List(m) = compileMethods(noOptCompiler)("def f(a: String) = a.trim") val a = newNullnessAnalyzer(m) - testNullness(a, m, "INVOKEVIRTUAL java/lang/String.trim", 1, Unknown) - testNullness(a, m, "ARETURN", 1, NotNull) + testNullness(a, m, "INVOKEVIRTUAL java/lang/String.trim", 1, UnknownValue1) + testNullness(a, m, "ARETURN", 1, NotNullValue) } @Test @@ -110,13 +111,13 @@ class NullnessAnalyzerTest extends ClearAfterClass { // ARETURN: 0: NotNull, 1: NotNull, 2: Unknown for ((insn, index, nullness) <- List( - ("+NEW", 2, Unknown), // new value at slot 2 on the stack - ("+DUP", 3, Unknown), - ("+INVOKESPECIAL java/lang/Object", 2, NotNull), // after calling the initializer on 3, the value at 2 becomes NotNull - ("ASTORE 1", 1, Unknown), // before the ASTORE 1, nullness of the value in local 1 is Unknown - ("+ASTORE 1", 1, NotNull), // after storing the value at 2 in local 1, the local 1 is NotNull - ("+ALOAD 1", 2, NotNull), // loading the value 1 puts a NotNull value on the stack (at 2) - ("+INVOKEVIRTUAL java/lang/Object.toString", 2, Unknown) // nullness of value returned by `toString` is Unknown + ("+NEW", 2, UnknownValue1), // new value at slot 2 on the stack + ("+DUP", 3, UnknownValue1), + ("+INVOKESPECIAL java/lang/Object", 2, NotNullValue), // after calling the initializer on 3, the value at 2 becomes NotNull + ("ASTORE 1", 1, UnknownValue1), // before the ASTORE 1, nullness of the value in local 1 is Unknown + ("+ASTORE 1", 1, NotNullValue), // after storing the value at 2 in local 1, the local 1 is NotNull + ("+ALOAD 1", 2, NotNullValue), // loading the value 1 puts a NotNull value on the stack (at 2) + ("+INVOKEVIRTUAL java/lang/Object.toString", 2, UnknownValue1) // nullness of value returned by `toString` is Unknown )) testNullness(a, m, insn, index, nullness) } @@ -125,9 +126,9 @@ class NullnessAnalyzerTest extends ClearAfterClass { val List(m) = compileMethods(noOptCompiler)("def f = { var a: Object = null; a }") val a = newNullnessAnalyzer(m) for ((insn, index, nullness) <- List( - ("+ACONST_NULL", 2, Null), - ("+ASTORE 1", 1, Null), - ("+ALOAD 1", 2, Null) + ("+ACONST_NULL", 2, NullValue), + ("+ASTORE 1", 1, NullValue), + ("+ALOAD 1", 2, NullValue) )) testNullness(a, m, insn, index, nullness) } @@ -135,15 +136,33 @@ class NullnessAnalyzerTest extends ClearAfterClass { def stringLiteralsNotNull(): Unit = { val List(m) = compileMethods(noOptCompiler)("""def f = { val a = "hi"; a.trim }""") val a = newNullnessAnalyzer(m) - testNullness(a, m, "+ASTORE 1", 1, NotNull) + testNullness(a, m, "+ASTORE 1", 1, NotNullValue) } @Test def newArraynotNull() { val List(m) = compileMethods(noOptCompiler)("def f = { val a = new Array[Int](2); a(0) }") val a = newNullnessAnalyzer(m) - testNullness(a, m, "+NEWARRAY T_INT", 2, NotNull) // new array on stack - testNullness(a, m, "+ASTORE 1", 1, NotNull) // local var (a) + testNullness(a, m, "+NEWARRAY T_INT", 2, NotNullValue) // new array on stack + testNullness(a, m, "+ASTORE 1", 1, NotNullValue) // local var (a) + } + + @Test + def mergeNullNotNull(): Unit = { + val code = + """def f(o: Object) = { + | var a: Object = o + | var c: Object = null + | if ("".trim eq "-") { + | c = o + | } + | a.toString + |} + """.stripMargin + val List(m) = compileMethods(noOptCompiler)(code) + val a = newNullnessAnalyzer(m) + val toSt = "+INVOKEVIRTUAL java/lang/Object.toString" + testNullness(a, m, toSt, 3, UnknownValue1) } @Test @@ -173,22 +192,22 @@ class NullnessAnalyzerTest extends ClearAfterClass { val toSt = "INVOKEVIRTUAL java/lang/Object.toString" val end = s"+$toSt" for ((insn, index, nullness) <- List( - (trim, 0, NotNull), // this - (trim, 1, Unknown), // parameter o - (trim, 2, Unknown), // a - (trim, 3, Null), // b - (trim, 4, Null), // c - (trim, 5, Unknown), // d - - (toSt, 2, Unknown), // a, still the same - (toSt, 3, Unknown), // b, was re-assinged in both branches to Unknown - (toSt, 4, Unknown), // c, was re-assigned in one branch to Unknown - (toSt, 5, Null), // d, was assigned to null in both branches - - (end, 2, NotNull), // a, NotNull (alias of b) - (end, 3, NotNull), // b, receiver of toString - (end, 4, Unknown), // c, no change (not an alias of b) - (end, 5, Null) // d, no change + (trim, 0, NotNullValue), // this + (trim, 1, UnknownValue1), // parameter o + (trim, 2, UnknownValue1), // a + (trim, 3, NullValue), // b + (trim, 4, NullValue), // c + (trim, 5, UnknownValue1), // d + + (toSt, 2, UnknownValue1), // a, still the same + (toSt, 3, UnknownValue1), // b, was re-assinged in both branches to Unknown + (toSt, 4, UnknownValue1), // c, was re-assigned in one branch to Unknown + (toSt, 5, NullValue), // d, was assigned to null in both branches + + (end, 2, NotNullValue), // a, NotNull (alias of b) + (end, 3, NotNullValue), // b, receiver of toString + (end, 4, UnknownValue1), // c, no change (not an alias of b) + (end, 5, NullValue) // d, no change )) testNullness(a, m, insn, index, nullness) } @@ -210,11 +229,11 @@ class NullnessAnalyzerTest extends ClearAfterClass { val trim = "INVOKEVIRTUAL java/lang/String.trim" for ((insn, index, nullness) <- List( - (instof, 1, Unknown), // a after INSTANCEOF - (instof, 2, Unknown), // x after INSTANCEOF - (tost, 1, NotNull), - (tost, 2, NotNull), - (trim, 3, NotNull) // receiver at `trim` + (instof, 1, UnknownValue1), // a after INSTANCEOF + (instof, 2, UnknownValue1), // x after INSTANCEOF + (tost, 1, NotNullValue), + (tost, 2, NotNullValue), + (trim, 3, NotNullValue) // receiver at `trim` )) testNullness(a, m, insn, index, nullness) } } -- cgit v1.2.3 From 7b52a12b0a4b5c7e7dcc439714baf167cf2f6e84 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 9 Sep 2015 20:11:27 +0200 Subject: Minor fixes in the optimizer Fixes the variable indices in the local variable table (debug info) for inlined locals. After re-writing a closure invocation to the body method, mark the callsite method as potentially having dead code. The body method may return Nothing$, creating new unreachable code in the callsite method. --- .../scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala | 4 ++-- .../scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala | 10 +++++++--- src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 2 +- src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala | 1 - 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala index df8dcc690a..ede572dfb4 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala @@ -308,14 +308,14 @@ object BytecodeUtils { * Clone the local variable descriptors of `methodNode` and map their `start` and `end` labels * according to the `labelMap`. */ - def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String): List[LocalVariableNode] = { + def cloneLocalVariableNodes(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], prefix: String, shift: Int): List[LocalVariableNode] = { methodNode.localVariables.iterator().asScala.map(localVariable => new LocalVariableNode( prefix + localVariable.name, localVariable.desc, localVariable.signature, labelMap(localVariable.start), labelMap(localVariable.end), - localVariable.index + localVariable.index + shift )).toList } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 30b7f2edad..1a1a6942dc 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -78,10 +78,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { closureInstantiations.iterator.flatMap({ case (methodNode, closureInits) => // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) - lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next.ownerClass.internalName) + lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next().ownerClass.internalName) val sortedInits = immutable.TreeSet.empty ++ closureInits.values - sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))) - }).toList // mapping to a list (not a map) to keep the sorting of closureInstantiationsByMethod + sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))).filter(_._2.nonEmpty) + }).toList // mapping to a list (not a map) to keep the sorting } // Rewrite all closure callsites (or issue inliner warnings for those that cannot be rewritten) @@ -268,6 +268,10 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { // Explanation: If the lambda body method is non-static, the receiver is a captured // value. It can only be captured within some instance method, so we know it's non-null. callGraph.addCallsite(bodyMethodCallsite) + + // Rewriting a closure invocation may render code unreachable. For example, the body method of + // (x: T) => ??? has return type Nothing$, and an ATHROW is added (see fixLoadedNothingOrNullValue). + unreachableCodeEliminated -= ownerMethod } /** diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 1550f942c7..8dff7ef446 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -406,7 +406,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.instructions.insert(callsiteInstruction, clonedInstructions) callsiteMethod.instructions.remove(callsiteInstruction) - callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_").asJava) + callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_", localVarShift).asJava) callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index 4132710a96..f5e6653344 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -196,7 +196,6 @@ object LocalOptImpls { a.analyze(ownerClassName, method) val frames = a.getFrames - val initialSize = method.instructions.size var i = 0 var liveLabels = Set.empty[LabelNode] var removedInstructions = Set.empty[AbstractInsnNode] -- cgit v1.2.3 From 7877ccda89a74c942107f955f3a217d9dab35a8e Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 16 Sep 2015 14:27:26 +0200 Subject: Run computeMaxLocalsMaxStack less often Introduce a cache to remember which methods have maxLocals and maxStack already computed. Before we were computing these values on every run of eliminateUnreachableCode. Also update the implementation of eliminateUnreachableCode to keep correct max values. --- .../scala/tools/nsc/backend/jvm/BTypes.scala | 13 +- .../tools/nsc/backend/jvm/BTypesFromSymbols.scala | 3 + .../tools/nsc/backend/jvm/analysis/Analyzers.scala | 29 ++ .../backend/jvm/analysis/ProdConsAnalyzer.scala | 483 --------------------- .../jvm/analysis/ProdConsAnalyzerImpl.scala | 462 ++++++++++++++++++++ .../tools/nsc/backend/jvm/analysis/package.scala | 47 +- .../tools/nsc/backend/jvm/opt/BytecodeUtils.scala | 37 +- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 13 +- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 5 +- .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 23 +- .../scala/tools/nsc/backend/jvm/opt/LocalOpt.scala | 82 +++- .../jvm/analysis/NullnessAnalyzerTest.scala | 15 +- .../jvm/analysis/ProdConsAnalyzerTest.scala | 1 + .../nsc/backend/jvm/opt/ClosureOptimizerTest.scala | 1 - .../nsc/backend/jvm/opt/InlineWarningTest.scala | 1 - .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 2 +- .../nsc/backend/jvm/opt/UnreachableCodeTest.scala | 2 +- 17 files changed, 651 insertions(+), 568 deletions(-) create mode 100644 src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala delete mode 100644 src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala create mode 100644 src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala index 6bc9f76e1a..c8ba0d9dbc 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypes.scala @@ -11,9 +11,10 @@ import scala.collection.concurrent.TrieMap import scala.reflect.internal.util.Position import scala.tools.asm import asm.Opcodes -import scala.tools.asm.tree.{MethodNode, MethodInsnNode, InnerClassNode, ClassNode} +import scala.tools.asm.tree._ import scala.tools.nsc.backend.jvm.BTypes.{InlineInfo, MethodInlineInfo} import scala.tools.nsc.backend.jvm.BackendReporting._ +import scala.tools.nsc.backend.jvm.analysis.Analyzers import scala.tools.nsc.backend.jvm.opt._ import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.settings.ScalaSettings @@ -50,6 +51,8 @@ abstract class BTypes { val callGraph: CallGraph[this.type] + val analyzers: Analyzers[this.type] + val backendReporting: BackendReporting // Allows to define per-run caches here and in the CallGraph component, which don't have a global @@ -58,7 +61,6 @@ abstract class BTypes { // Allows access to the compiler settings for backend components that don't have a global in scope def compilerSettings: ScalaSettings - /** * A map from internal names to ClassBTypes. Every ClassBType is added to this map on its * construction. @@ -96,6 +98,13 @@ abstract class BTypes { */ val unreachableCodeEliminated: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty) + /** + * Cache of methods which have correct `maxLocals` / `maxStack` values assigned. This allows + * invoking `computeMaxLocalsMaxStack` whenever running an analyzer but performing the actual + * computation only when necessary. + */ + val maxLocalsMaxStackComputed: collection.mutable.Set[MethodNode] = recordPerRunCache(collection.mutable.Set.empty) + /** * Obtain the BType for a type descriptor or internal name. For class descriptors, the ClassBType * is constructed by parsing the corresponding classfile. diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index dc87ed631d..8cccc50c69 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -7,6 +7,7 @@ package scala.tools.nsc package backend.jvm import scala.tools.asm +import scala.tools.nsc.backend.jvm.analysis.Analyzers import scala.tools.nsc.backend.jvm.opt._ import scala.tools.nsc.backend.jvm.BTypes._ import BackendReporting._ @@ -48,6 +49,8 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { val callGraph: CallGraph[this.type] = new CallGraph(this) + val analyzers: Analyzers[this.type] = new Analyzers(this) + val backendReporting: BackendReporting = new BackendReportingImpl(global) final def initializeCoreBTypes(): Unit = { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala new file mode 100644 index 0000000000..3734c63e93 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala @@ -0,0 +1,29 @@ +package scala.tools.nsc +package backend.jvm +package analysis + +import scala.tools.asm.tree.{AbstractInsnNode, MethodNode} +import scala.tools.asm.tree.analysis.{Frame, BasicInterpreter, Analyzer, Value} +import scala.tools.nsc.backend.jvm.BTypes._ +import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + +/** + * This component hosts tools for running ASM analyzers that require access to a `BTypes` instance. + * In particular, the AsmAnalyzer class runs `computeMaxLocalsMaxStack` on the methodNode to be + * analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run + * cache `maxLocalsMaxStackComputed` defined in there. + */ +class Analyzers[BT <: BTypes](val btypes: BT) { + import btypes._ + + /** + * A wrapper to make ASM's Analyzer a bit easier to use. + */ + class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) { + localOpt.computeMaxLocalsMaxStack(methodNode) + analyzer.analyze(classInternalName, methodNode) + def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) + } + + class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala deleted file mode 100644 index f2d8dc910a..0000000000 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala +++ /dev/null @@ -1,483 +0,0 @@ -/* NSC -- new Scala compiler - * Copyright 2005-2015 LAMP/EPFL - * @author Martin Odersky - */ - -package scala.tools.nsc -package backend.jvm -package analysis - -import java.util - -import scala.annotation.switch -import scala.collection.mutable -import scala.tools.asm.{Type, MethodVisitor} -import scala.tools.asm.Opcodes._ -import scala.tools.asm.tree._ -import scala.tools.asm.tree.analysis._ -import scala.tools.nsc.backend.jvm.BTypes.InternalName - -import opt.BytecodeUtils._ - -import scala.collection.convert.decorateAsScala._ - -/** - * This class provides additional queries over ASM's built-in `SourceValue` analysis. - * - * The analysis computes for each value in a frame a set of source instructions, which are the - * potential producers. Most instructions produce either nothing or a stack value. For example, - * a `LOAD` instruction is the producer of the value pushed onto the stack. The exception are - * `STORE` instructions, which produce a new value for a local variable slot, so they are used - * as producers for the value they stored. - * - * Note that pseudo-instructions are used as initial producers for parameters and local variables. - * See the documentation on class InitialProducer. - * - * This class implements the following queries over the data computed by the SourceValue analysis: - * - * - producersForValueAt(insn, slot) - * - consumersOfValueAt(insn, slot) - * - * - producersForInputsOf(insn) - * - consumersOfOutputsFrom(insn) - * - * - initialProducersForValueAt(insn, slot) - * - ultimateConsumersOfValueAt(insn, slot) - * - * - initialProducersForInputsOf(insn) - * - ultimateConsumersOfOutputsFrom(insn) - * - * The following operations are considered as copying operations: - * - xLOAD, xSTORE - * - DUP, DUP2, DUP_X1, DUP_X2, DUP2_X1, DUP2_X2 - * - SWAP - * - CHECKCAST - * - * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as - * copying operations. - * - * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM) - * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge - * function (merging producer sets) is more complex than merging simple basic values. - * See also the doc comment in the package object `analysis`. - */ -class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) { - - /* Timers for benchmarking ProdCons - import scala.reflect.internal.util.Statistics._ - import ProdConsAnalyzer._ - val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer) - val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer) - */ - - val analyzer = new Analyzer(new InitialProducerSourceInterpreter) - -// val start = analyzerTimer.start() - analyzer.analyze(classInternalName, methodNode) -// analyzerTimer.stop(start) -// println(analyzerTimer.line) - - def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode) - - /** - * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`. - * This method simply returns the producer information computed by the SourceValue analysis. - */ - def producersForValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { - frameAt(insn).getValue(slot).insns.asScala.toSet - } - - /** - * Returns the potential consumer instructions of a (local or stack) value in the frame of `insn`. - * This is the counterpart of `producersForValueAt`. - */ - def consumersOfValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { - producersForValueAt(insn, slot).flatMap(prod => { - val outputNumber = outputValueSlots(prod).indexOf(slot) - _consumersOfOutputsFrom.get(prod).map(v => { - v(outputNumber) - }).getOrElse(Set.empty) - }) - } - - /** - * Returns the potential producer instructions of any of the values consumed by `insn`. - */ - def producersForInputsOf(insn: AbstractInsnNode): Set[AbstractInsnNode] = { - inputValues(insn).iterator.flatMap(v => v.insns.asScala).toSet - } - - def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = - _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty) - - /** - * Returns the potential initial producer instructions of a value in the frame of `insn`. - * - * Unlike `producersForValueAt`, producers are tracked through copying instructions such as STORE - * and LOAD. If the producer of the value is a LOAD, then the producers of the stored value(s) are - * returned instead. - */ - def initialProducersForValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { - def initialProducers(insn: AbstractInsnNode, producedSlot: Int): Set[AbstractInsnNode] = { - if (isCopyOperation(insn)) { - val key = (insn, producedSlot) - _initialProducersCache.getOrElseUpdate(key, { - // prevent infinite recursion if an instruction is its own producer or consumer - // see cyclicProdCons in ProdConsAnalyzerTest - _initialProducersCache(key) = Set.empty - val (sourceValue, sourceValueSlot) = copyOperationSourceValue(insn, producedSlot) - sourceValue.insns.iterator.asScala.flatMap(initialProducers(_, sourceValueSlot)).toSet - }) - } else { - Set(insn) - } - } - producersForValueAt(insn, slot).flatMap(initialProducers(_, slot)) - } - - /** - * Returns the potential ultimate consumers of a value in the frame of `insn`. Consumers are - * tracked through copying operations such as SOTRE and LOAD. - */ - def ultimateConsumersOfValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { - def ultimateConsumers(insn: AbstractInsnNode, consumedSlot: Int): Set[AbstractInsnNode] = { - if (isCopyOperation(insn)) { - val key = (insn, consumedSlot) - _ultimateConsumersCache.getOrElseUpdate(key, { - // prevent infinite recursion if an instruction is its own producer or consumer - // see cyclicProdCons in ProdConsAnalyzerTest - _ultimateConsumersCache(key) = Set.empty - for { - producedSlot <- copyOperationProducedValueSlots(insn, consumedSlot) - consumer <- consumersOfValueAt(insn.getNext, producedSlot) - ultimateConsumer <- ultimateConsumers(consumer, producedSlot) - } yield ultimateConsumer - }) - } else { - Set(insn) - } - } - consumersOfValueAt(insn, slot).flatMap(ultimateConsumers(_, slot)) - } - - def initialProducersForInputsOf(insn: AbstractInsnNode): Set[AbstractInsnNode] = { - inputValueSlots(insn).flatMap(slot => initialProducersForValueAt(insn, slot)).toSet - } - - def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = { - lazy val next = insn.getNext - outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet - } - - private def isCopyOperation(insn: AbstractInsnNode): Boolean = { - isVarInstruction(insn) || { - (insn.getOpcode: @switch) match { - case DUP | DUP_X1 | DUP_X2 | DUP2 | DUP2_X1 | DUP2_X2 | SWAP | CHECKCAST => true - case _ => false - } - } - } - - /** - * Returns the value and its frame slot that `copyOp` copies into `producedSlot`. - * - * Example: - * - copyOp = DUP_X1, assume it produces slots 2,3,4 - * - producedSlot = 3 - * - the result is the value at slot 2 in the frame of `copyOp` - */ - private def copyOperationSourceValue(copyOp: AbstractInsnNode, producedSlot: Int): (SourceValue, Int) = { - val frame = frameAt(copyOp) - - // Index of the produced value. Example: DUP_X1 produces 3 values, so producedIndex is 0, 1 or 2, - // where 0 corresponds to the lowest value on the stack. - def producedIndex(numConsumed: Int) = { - val numUsedSlotsBeforeCopy = frame.stackTop + 1 - producedSlot - (numUsedSlotsBeforeCopy - numConsumed) - } - - def stackValue(n: Int) = (frame.peekStack(n), frame.stackTop - n) - - def dupX1Case = (producedIndex(2): @switch) match { - case 0 | 2 => stackValue(0) - case 1 => stackValue(1) - } - - // Form 1 of dup_x2 - def dupX2Case = (producedIndex(3): @switch) match { - case 0 | 3 => stackValue(0) - case 1 => stackValue(2) - case 2 => stackValue(1) - } - - // Form 1 of dup2_x1 - def dup2X1Case = (producedIndex(3): @switch) match { - case 0 | 3 => stackValue(1) - case 1 | 4 => stackValue(0) - case 2 => stackValue(2) - } - - if (isLoad(copyOp)) { - val slot = copyOp.asInstanceOf[VarInsnNode].`var` - (frame.getLocal(slot), slot) - } else if (isStore(copyOp)) { - stackValue(0) - } else (copyOp.getOpcode: @switch) match { - case DUP => - stackValue(0) // the current stack top is the source of both produced values - - case DUP_X1 => - dupX1Case - - case DUP_X2 => - if (frame.peekStack(1).getSize == 2) dupX1Case - else dupX2Case - - case DUP2 => - if (frame.peekStack(0).getSize == 2) stackValue(0) - else { - (producedIndex(2): @switch) match { - case 0 | 2 => stackValue(1) - case 1 | 3 => stackValue(0) - } - } - - case DUP2_X1 => - if (frame.peekStack(0).getSize == 2) dupX1Case - else dup2X1Case - - case DUP2_X2 => - val v1isSize2 = frame.peekStack(0).getSize == 2 - if (v1isSize2) { - val v2isSize2 = frame.peekStack(1).getSize == 2 - if (v2isSize2) dupX1Case // Form 4 - else dupX2Case // Form 2 - } else { - val v3isSize2 = frame.peekStack(2).getSize == 2 - if (v3isSize2) dup2X1Case // Form 3 - else { - // Form 1 - (producedIndex(4): @switch) match { - case 0 | 4 => stackValue(1) - case 1 | 5 => stackValue(0) - case 2 => stackValue(3) - case 3 => stackValue(2) - } - } - } - - case SWAP => - if (producedIndex(2) == 0) stackValue(0) - else stackValue(1) - - case CHECKCAST => - stackValue(0) - } - } - - /** - * Returns the value slots into which `copyOp` copies the value at `consumedSlot`. - * - * Example: - * - copyOp = DUP_X1, assume it consumes slots 2,3 and produces 2,3,4 - * - if consumedSlot == 2, the result is Set(3) - * - if consumedSlot == 3, the result is Set(2, 4) - */ - private def copyOperationProducedValueSlots(copyOp: AbstractInsnNode, consumedSlot: Int): Set[Int] = { - if (isStore(copyOp)) Set(copyOp.asInstanceOf[VarInsnNode].`var`) - else { - val nextFrame = frameAt(copyOp.getNext) - val top = nextFrame.stackTop - - // Index of the consumed value. Example: DUP_X1 consumes two values, so consumedIndex is - // 0 or 1, where 0 corresponds to the lower value on the stack. - def consumedIndex(numProduced: Int) = { - val numUsedSlotsAfterCopy = top + 1 - consumedSlot - (numUsedSlotsAfterCopy - numProduced) - } - - def dupX1Case = (consumedIndex(3): @switch) match { - case 0 => Set(top - 1) - case 1 => Set(top - 2, top) - } - - def dupX2Case = (consumedIndex(4): @switch) match { - case 0 => Set(top - 2) - case 1 => Set(top - 1) - case 2 => Set(top - 3, top) - } - - def dup2X1Case = (consumedIndex(5): @switch) match { - case 0 => Set(top - 2) - case 1 => Set(top - 4, top - 1) - case 2 => Set(top - 3, top) - } - - if (isLoad(copyOp)) Set(top) - else (copyOp.getOpcode: @switch) match { - case DUP => - Set(top - 1, top) - - case DUP_X1 => - dupX1Case - - case DUP_X2 => - if (nextFrame.peekStack(1).getSize == 2) dupX1Case - else dupX2Case - - case DUP2 => - if (nextFrame.peekStack(0).getSize == 2) Set(top - 1, top) - else (consumedIndex(4): @switch) match { - case 0 => Set(top - 3, top - 1) - case 1 => Set(top - 2, top) - } - - case DUP2_X1 => - if (nextFrame.peekStack(0).getSize == 2) dupX1Case - else dup2X1Case - - case DUP2_X2 => - val v1isSize2 = nextFrame.peekStack(0).getSize == 2 - if (v1isSize2) { - val v2isSize2 = nextFrame.peekStack(1).getSize == 2 - if (v2isSize2) dupX1Case // Form 4 - else dupX2Case // Form 2 - } else { - val v3isSize2 = nextFrame.peekStack(2).getSize == 2 - if (v3isSize2) dup2X1Case // Form 3 - else { - // Form 1 - (consumedIndex(6): @switch) match { - case 0 => Set(top - 3) - case 1 => Set(top - 2) - case 2 => Set(top - 5, top - 1) - case 3 => Set(top - 4, top) - } - } - } - - case SWAP => - if (consumedIndex(2) == 0) Set(top) - else Set(top - 1) - - case CHECKCAST => - Set(top) - } - } - } - - /** Returns the frame values consumed by executing `insn`. */ - private def inputValues(insn: AbstractInsnNode): Seq[SourceValue] = { - lazy val frame = frameAt(insn) - inputValueSlots(insn) map frame.getValue - } - - /** Returns the frame slots holding the values consumed by executing `insn`. */ - private def inputValueSlots(insn: AbstractInsnNode): Seq[Int] = { - if (insn.getOpcode == -1) return Seq.empty - if (isLoad(insn)) { - Seq(insn.asInstanceOf[VarInsnNode].`var`) - } else if (insn.getOpcode == IINC) { - Seq(insn.asInstanceOf[IincInsnNode].`var`) - } else { - val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) - val stackSize = frame.getLocals + frame.getStackSize - (stackSize - stackEffect._1) until stackSize - } - } - - /** Returns the frame slots holding the values produced by executing `insn`. */ - private def outputValueSlots(insn: AbstractInsnNode): Seq[Int] = insn match { - case ParameterProducer(local) => Seq(local) - case UninitializedLocalProducer(local) => Seq(local) - case ExceptionProducer(frame) => Seq(frame.stackTop) - case _ => - if (insn.getOpcode == -1) return Seq.empty - if (isStore(insn)) { - Seq(insn.asInstanceOf[VarInsnNode].`var`) - } else if (insn.getOpcode == IINC) { - Seq(insn.asInstanceOf[IincInsnNode].`var`) - } else { - val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) - val nextFrame = frameAt(insn.getNext) - val stackSize = nextFrame.getLocals + nextFrame.getStackSize - (stackSize - stackEffect._2) until stackSize - } - } - - /** For each instruction, a set of potential consumers of the produced values. */ - private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = { -// val start = consumersTimer.start() - var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] - for { - insn <- methodNode.instructions.iterator.asScala - frame = frameAt(insn) - i <- inputValueSlots(insn) - producer <- frame.getValue(i).insns.asScala - } { - val producedSlots = outputValueSlots(producer) - val currentConsumers = res.getOrElse(producer, Vector.fill(producedSlots.size)(Set.empty[AbstractInsnNode])) - val outputIndex = producedSlots.indexOf(i) - res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn)) - } -// consumersTimer.stop(start) -// println(consumersTimer.line) - res - } - - private val _initialProducersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty - private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty -} - -object ProdConsAnalyzer { - import scala.reflect.internal.util.Statistics._ - val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm") -} - -/** - * A class for pseudo-instructions representing the initial producers of local values that have - * no producer instruction in the method: - * - parameters, including `this` - * - uninitialized local variables - * - exception values in handlers - * - * The ASM built-in SourceValue analysis yields an empty producers set for such values. This leads - * to ambiguities. Example (in Java one can re-assign parameter): - * - * void foo(int a) { - * if (a == 0) a = 1; - * return a; - * } - * - * In the first frame of the method, the SoruceValue for parameter `a` gives an empty set of - * producer instructions. - * - * In the frame of the `IRETURN` instruction, the SoruceValue for parameter `a` lists a single - * producer instruction: the `ISTORE 1`. This makes it look as if there was a single producer for - * `a`, where in fact it might still hold the parameter's initial value. - */ -abstract class InitialProducer extends AbstractInsnNode(-1) { - override def getType: Int = throw new UnsupportedOperationException - override def clone(labels: util.Map[LabelNode, LabelNode]): AbstractInsnNode = throw new UnsupportedOperationException - override def accept(cv: MethodVisitor): Unit = throw new UnsupportedOperationException -} - -case class ParameterProducer(local: Int) extends InitialProducer -case class UninitializedLocalProducer(local: Int) extends InitialProducer -case class ExceptionProducer[V <: Value](handlerFrame: Frame[V]) extends InitialProducer - -class InitialProducerSourceInterpreter extends SourceInterpreter { - override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): SourceValue = { - new SourceValue(tp.getSize, ParameterProducer(local)) - } - - override def newEmptyNonParameterLocalValue(local: Int): SourceValue = { - new SourceValue(1, UninitializedLocalProducer(local)) - } - - override def newExceptionValue(tryCatchBlockNode: TryCatchBlockNode, handlerFrame: Frame[_ <: Value], exceptionType: Type): SourceValue = { - new SourceValue(1, ExceptionProducer(handlerFrame)) - } -} \ No newline at end of file diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala new file mode 100644 index 0000000000..ce2fe943e4 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala @@ -0,0 +1,462 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2015 LAMP/EPFL + * @author Martin Odersky + */ + +package scala.tools.nsc +package backend.jvm +package analysis + +import java.util + +import scala.annotation.switch +import scala.collection.mutable +import scala.tools.asm.{Type, MethodVisitor} +import scala.tools.asm.Opcodes._ +import scala.tools.asm.tree._ +import scala.tools.asm.tree.analysis._ +import scala.tools.nsc.backend.jvm.BTypes.InternalName + +import opt.BytecodeUtils._ + +import scala.collection.convert.decorateAsScala._ + +/** + * This class provides additional queries over ASM's built-in `SourceValue` analysis. + * + * The analysis computes for each value in a frame a set of source instructions, which are the + * potential producers. Most instructions produce either nothing or a stack value. For example, + * a `LOAD` instruction is the producer of the value pushed onto the stack. The exception are + * `STORE` instructions, which produce a new value for a local variable slot, so they are used + * as producers for the value they stored. + * + * Note that pseudo-instructions are used as initial producers for parameters and local variables. + * See the documentation on class InitialProducer. + * + * This class implements the following queries over the data computed by the SourceValue analysis: + * + * - producersForValueAt(insn, slot) + * - consumersOfValueAt(insn, slot) + * + * - producersForInputsOf(insn) + * - consumersOfOutputsFrom(insn) + * + * - initialProducersForValueAt(insn, slot) + * - ultimateConsumersOfValueAt(insn, slot) + * + * - initialProducersForInputsOf(insn) + * - ultimateConsumersOfOutputsFrom(insn) + * + * The following operations are considered as copying operations: + * - xLOAD, xSTORE + * - DUP, DUP2, DUP_X1, DUP_X2, DUP2_X1, DUP2_X2 + * - SWAP + * - CHECKCAST + * + * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as + * copying operations. + * + * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM) + * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge + * function (merging producer sets) is more complex than merging simple basic values. + * See also the doc comment in the package object `analysis`. + */ +trait ProdConsAnalyzerImpl { + val methodNode: MethodNode + + def frameAt(insn: AbstractInsnNode): Frame[SourceValue] + + /** + * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`. + * This method simply returns the producer information computed by the SourceValue analysis. + */ + def producersForValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { + frameAt(insn).getValue(slot).insns.asScala.toSet + } + + /** + * Returns the potential consumer instructions of a (local or stack) value in the frame of `insn`. + * This is the counterpart of `producersForValueAt`. + */ + def consumersOfValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { + producersForValueAt(insn, slot).flatMap(prod => { + val outputNumber = outputValueSlots(prod).indexOf(slot) + _consumersOfOutputsFrom.get(prod).map(v => { + v(outputNumber) + }).getOrElse(Set.empty) + }) + } + + /** + * Returns the potential producer instructions of any of the values consumed by `insn`. + */ + def producersForInputsOf(insn: AbstractInsnNode): Set[AbstractInsnNode] = { + inputValues(insn).iterator.flatMap(v => v.insns.asScala).toSet + } + + def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = + _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty) + + /** + * Returns the potential initial producer instructions of a value in the frame of `insn`. + * + * Unlike `producersForValueAt`, producers are tracked through copying instructions such as STORE + * and LOAD. If the producer of the value is a LOAD, then the producers of the stored value(s) are + * returned instead. + */ + def initialProducersForValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { + def initialProducers(insn: AbstractInsnNode, producedSlot: Int): Set[AbstractInsnNode] = { + if (isCopyOperation(insn)) { + val key = (insn, producedSlot) + _initialProducersCache.getOrElseUpdate(key, { + // prevent infinite recursion if an instruction is its own producer or consumer + // see cyclicProdCons in ProdConsAnalyzerTest + _initialProducersCache(key) = Set.empty + val (sourceValue, sourceValueSlot) = copyOperationSourceValue(insn, producedSlot) + sourceValue.insns.iterator.asScala.flatMap(initialProducers(_, sourceValueSlot)).toSet + }) + } else { + Set(insn) + } + } + producersForValueAt(insn, slot).flatMap(initialProducers(_, slot)) + } + + /** + * Returns the potential ultimate consumers of a value in the frame of `insn`. Consumers are + * tracked through copying operations such as SOTRE and LOAD. + */ + def ultimateConsumersOfValueAt(insn: AbstractInsnNode, slot: Int): Set[AbstractInsnNode] = { + def ultimateConsumers(insn: AbstractInsnNode, consumedSlot: Int): Set[AbstractInsnNode] = { + if (isCopyOperation(insn)) { + val key = (insn, consumedSlot) + _ultimateConsumersCache.getOrElseUpdate(key, { + // prevent infinite recursion if an instruction is its own producer or consumer + // see cyclicProdCons in ProdConsAnalyzerTest + _ultimateConsumersCache(key) = Set.empty + for { + producedSlot <- copyOperationProducedValueSlots(insn, consumedSlot) + consumer <- consumersOfValueAt(insn.getNext, producedSlot) + ultimateConsumer <- ultimateConsumers(consumer, producedSlot) + } yield ultimateConsumer + }) + } else { + Set(insn) + } + } + consumersOfValueAt(insn, slot).flatMap(ultimateConsumers(_, slot)) + } + + def initialProducersForInputsOf(insn: AbstractInsnNode): Set[AbstractInsnNode] = { + inputValueSlots(insn).flatMap(slot => initialProducersForValueAt(insn, slot)).toSet + } + + def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = { + lazy val next = insn.getNext + outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet + } + + private def isCopyOperation(insn: AbstractInsnNode): Boolean = { + isVarInstruction(insn) || { + (insn.getOpcode: @switch) match { + case DUP | DUP_X1 | DUP_X2 | DUP2 | DUP2_X1 | DUP2_X2 | SWAP | CHECKCAST => true + case _ => false + } + } + } + + /** + * Returns the value and its frame slot that `copyOp` copies into `producedSlot`. + * + * Example: + * - copyOp = DUP_X1, assume it produces slots 2,3,4 + * - producedSlot = 3 + * - the result is the value at slot 2 in the frame of `copyOp` + */ + private def copyOperationSourceValue(copyOp: AbstractInsnNode, producedSlot: Int): (SourceValue, Int) = { + val frame = frameAt(copyOp) + + // Index of the produced value. Example: DUP_X1 produces 3 values, so producedIndex is 0, 1 or 2, + // where 0 corresponds to the lowest value on the stack. + def producedIndex(numConsumed: Int) = { + val numUsedSlotsBeforeCopy = frame.stackTop + 1 + producedSlot - (numUsedSlotsBeforeCopy - numConsumed) + } + + def stackValue(n: Int) = (frame.peekStack(n), frame.stackTop - n) + + def dupX1Case = (producedIndex(2): @switch) match { + case 0 | 2 => stackValue(0) + case 1 => stackValue(1) + } + + // Form 1 of dup_x2 + def dupX2Case = (producedIndex(3): @switch) match { + case 0 | 3 => stackValue(0) + case 1 => stackValue(2) + case 2 => stackValue(1) + } + + // Form 1 of dup2_x1 + def dup2X1Case = (producedIndex(3): @switch) match { + case 0 | 3 => stackValue(1) + case 1 | 4 => stackValue(0) + case 2 => stackValue(2) + } + + if (isLoad(copyOp)) { + val slot = copyOp.asInstanceOf[VarInsnNode].`var` + (frame.getLocal(slot), slot) + } else if (isStore(copyOp)) { + stackValue(0) + } else (copyOp.getOpcode: @switch) match { + case DUP => + stackValue(0) // the current stack top is the source of both produced values + + case DUP_X1 => + dupX1Case + + case DUP_X2 => + if (frame.peekStack(1).getSize == 2) dupX1Case + else dupX2Case + + case DUP2 => + if (frame.peekStack(0).getSize == 2) stackValue(0) + else { + (producedIndex(2): @switch) match { + case 0 | 2 => stackValue(1) + case 1 | 3 => stackValue(0) + } + } + + case DUP2_X1 => + if (frame.peekStack(0).getSize == 2) dupX1Case + else dup2X1Case + + case DUP2_X2 => + val v1isSize2 = frame.peekStack(0).getSize == 2 + if (v1isSize2) { + val v2isSize2 = frame.peekStack(1).getSize == 2 + if (v2isSize2) dupX1Case // Form 4 + else dupX2Case // Form 2 + } else { + val v3isSize2 = frame.peekStack(2).getSize == 2 + if (v3isSize2) dup2X1Case // Form 3 + else { + // Form 1 + (producedIndex(4): @switch) match { + case 0 | 4 => stackValue(1) + case 1 | 5 => stackValue(0) + case 2 => stackValue(3) + case 3 => stackValue(2) + } + } + } + + case SWAP => + if (producedIndex(2) == 0) stackValue(0) + else stackValue(1) + + case CHECKCAST => + stackValue(0) + } + } + + /** + * Returns the value slots into which `copyOp` copies the value at `consumedSlot`. + * + * Example: + * - copyOp = DUP_X1, assume it consumes slots 2,3 and produces 2,3,4 + * - if consumedSlot == 2, the result is Set(3) + * - if consumedSlot == 3, the result is Set(2, 4) + */ + private def copyOperationProducedValueSlots(copyOp: AbstractInsnNode, consumedSlot: Int): Set[Int] = { + if (isStore(copyOp)) Set(copyOp.asInstanceOf[VarInsnNode].`var`) + else { + val nextFrame = frameAt(copyOp.getNext) + val top = nextFrame.stackTop + + // Index of the consumed value. Example: DUP_X1 consumes two values, so consumedIndex is + // 0 or 1, where 0 corresponds to the lower value on the stack. + def consumedIndex(numProduced: Int) = { + val numUsedSlotsAfterCopy = top + 1 + consumedSlot - (numUsedSlotsAfterCopy - numProduced) + } + + def dupX1Case = (consumedIndex(3): @switch) match { + case 0 => Set(top - 1) + case 1 => Set(top - 2, top) + } + + def dupX2Case = (consumedIndex(4): @switch) match { + case 0 => Set(top - 2) + case 1 => Set(top - 1) + case 2 => Set(top - 3, top) + } + + def dup2X1Case = (consumedIndex(5): @switch) match { + case 0 => Set(top - 2) + case 1 => Set(top - 4, top - 1) + case 2 => Set(top - 3, top) + } + + if (isLoad(copyOp)) Set(top) + else (copyOp.getOpcode: @switch) match { + case DUP => + Set(top - 1, top) + + case DUP_X1 => + dupX1Case + + case DUP_X2 => + if (nextFrame.peekStack(1).getSize == 2) dupX1Case + else dupX2Case + + case DUP2 => + if (nextFrame.peekStack(0).getSize == 2) Set(top - 1, top) + else (consumedIndex(4): @switch) match { + case 0 => Set(top - 3, top - 1) + case 1 => Set(top - 2, top) + } + + case DUP2_X1 => + if (nextFrame.peekStack(0).getSize == 2) dupX1Case + else dup2X1Case + + case DUP2_X2 => + val v1isSize2 = nextFrame.peekStack(0).getSize == 2 + if (v1isSize2) { + val v2isSize2 = nextFrame.peekStack(1).getSize == 2 + if (v2isSize2) dupX1Case // Form 4 + else dupX2Case // Form 2 + } else { + val v3isSize2 = nextFrame.peekStack(2).getSize == 2 + if (v3isSize2) dup2X1Case // Form 3 + else { + // Form 1 + (consumedIndex(6): @switch) match { + case 0 => Set(top - 3) + case 1 => Set(top - 2) + case 2 => Set(top - 5, top - 1) + case 3 => Set(top - 4, top) + } + } + } + + case SWAP => + if (consumedIndex(2) == 0) Set(top) + else Set(top - 1) + + case CHECKCAST => + Set(top) + } + } + } + + /** Returns the frame values consumed by executing `insn`. */ + private def inputValues(insn: AbstractInsnNode): Seq[SourceValue] = { + lazy val frame = frameAt(insn) + inputValueSlots(insn) map frame.getValue + } + + /** Returns the frame slots holding the values consumed by executing `insn`. */ + private def inputValueSlots(insn: AbstractInsnNode): Seq[Int] = { + if (insn.getOpcode == -1) return Seq.empty + if (isLoad(insn)) { + Seq(insn.asInstanceOf[VarInsnNode].`var`) + } else if (insn.getOpcode == IINC) { + Seq(insn.asInstanceOf[IincInsnNode].`var`) + } else { + val frame = frameAt(insn) + val stackEffect = InstructionStackEffect(insn, frame) + val stackSize = frame.getLocals + frame.getStackSize + (stackSize - stackEffect._1) until stackSize + } + } + + /** Returns the frame slots holding the values produced by executing `insn`. */ + private def outputValueSlots(insn: AbstractInsnNode): Seq[Int] = insn match { + case ParameterProducer(local) => Seq(local) + case UninitializedLocalProducer(local) => Seq(local) + case ExceptionProducer(frame) => Seq(frame.stackTop) + case _ => + if (insn.getOpcode == -1) return Seq.empty + if (isStore(insn)) { + Seq(insn.asInstanceOf[VarInsnNode].`var`) + } else if (insn.getOpcode == IINC) { + Seq(insn.asInstanceOf[IincInsnNode].`var`) + } else { + val frame = frameAt(insn) + val stackEffect = InstructionStackEffect(insn, frame) + val nextFrame = frameAt(insn.getNext) + val stackSize = nextFrame.getLocals + nextFrame.getStackSize + (stackSize - stackEffect._2) until stackSize + } + } + + /** For each instruction, a set of potential consumers of the produced values. */ + private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = { + var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] + for { + insn <- methodNode.instructions.iterator.asScala + frame = frameAt(insn) + i <- inputValueSlots(insn) + producer <- frame.getValue(i).insns.asScala + } { + val producedSlots = outputValueSlots(producer) + val currentConsumers = res.getOrElse(producer, Vector.fill(producedSlots.size)(Set.empty[AbstractInsnNode])) + val outputIndex = producedSlots.indexOf(i) + res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn)) + } + res + } + + private val _initialProducersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty + private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty +} + +/** + * A class for pseudo-instructions representing the initial producers of local values that have + * no producer instruction in the method: + * - parameters, including `this` + * - uninitialized local variables + * - exception values in handlers + * + * The ASM built-in SourceValue analysis yields an empty producers set for such values. This leads + * to ambiguities. Example (in Java one can re-assign parameter): + * + * void foo(int a) { + * if (a == 0) a = 1; + * return a; + * } + * + * In the first frame of the method, the SoruceValue for parameter `a` gives an empty set of + * producer instructions. + * + * In the frame of the `IRETURN` instruction, the SoruceValue for parameter `a` lists a single + * producer instruction: the `ISTORE 1`. This makes it look as if there was a single producer for + * `a`, where in fact it might still hold the parameter's initial value. + */ +abstract class InitialProducer extends AbstractInsnNode(-1) { + override def getType: Int = throw new UnsupportedOperationException + override def clone(labels: util.Map[LabelNode, LabelNode]): AbstractInsnNode = throw new UnsupportedOperationException + override def accept(cv: MethodVisitor): Unit = throw new UnsupportedOperationException +} + +case class ParameterProducer(local: Int) extends InitialProducer +case class UninitializedLocalProducer(local: Int) extends InitialProducer +case class ExceptionProducer[V <: Value](handlerFrame: Frame[V]) extends InitialProducer + +class InitialProducerSourceInterpreter extends SourceInterpreter { + override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): SourceValue = { + new SourceValue(tp.getSize, ParameterProducer(local)) + } + + override def newEmptyNonParameterLocalValue(local: Int): SourceValue = { + new SourceValue(1, UninitializedLocalProducer(local)) + } + + override def newExceptionValue(tryCatchBlockNode: TryCatchBlockNode, handlerFrame: Frame[_ <: Value], exceptionType: Type): SourceValue = { + new SourceValue(1, ExceptionProducer(handlerFrame)) + } +} \ No newline at end of file diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala index 402357c55b..c50648acd9 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala @@ -26,7 +26,8 @@ package scala.tools.nsc.backend.jvm * - A `top` index stores the index of the current stack top * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next - * stack value. + * stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode. + * See comment below. * - Defines the `execute(instruction)` method. * - executing mutates the state of the frame according to the effect of the instruction * - pop consumed values from the stack @@ -58,6 +59,50 @@ package scala.tools.nsc.backend.jvm * - the empty method `newControlFlowEdge` can be overridden to track control flow if required * * + * MaxLocals and MaxStack + * ---------------------- + * + * At the JVM level, long and double values occupy two slots, both as local variables and on the + * stack, as specified in the JVM spec 2.6.2: + * "At any point in time, an operand stack has an associated depth, where a value of type long or + * double contributes two units to the depth and a value of any other type contributes one unit." + * + * For example, a method + * class A { def f(a: Long, b: Long) = a + b } + * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when + * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter). + * + * For running an ASM Analyzer, long and double values occupy two local variable slots, but only + * a single slot on the call stack, as shown by the following snippet: + * + * import scala.tools.nsc.backend.jvm._ + * import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + * import scala.collection.convert.decorateAsScala._ + * import scala.tools.asm.tree.analysis._ + * + * val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class") + * val m = cn.methods.iterator.asScala.find(_.name == "f").head + * + * // the value is read from the classfile, so it's 4 + * println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4 + * + * // we can safely set it to 2 for running the analyzer. + * m.maxStack = 2 + * + * val a = new Analyzer(new BasicInterpreter) + * a.analyze(cn.name, m) + * val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode + * val addFrame = a.frameAt(addInsn, m) + * + * addFrame.getStackSize // 2: the two long values only take one slot each + * addFrame.getLocals // 5: this takes one slot, the two long parameters take 2 slots each + * + * + * While running the optimizer, we need to make sure that the `maxStack` value of a method is + * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in + * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter. + * + * * Lessons learnt while benchmarking the alias tracking analysis * ------------------------------------------------------------- * diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala index ede572dfb4..ea186f9a1b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/BytecodeUtils.scala @@ -173,6 +173,11 @@ object BytecodeUtils { case Opcodes.IFNONNULL => Opcodes.IFNULL } + def isSize2LoadOrStore(opcode: Int): Boolean = (opcode: @switch) match { + case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true + case _ => false + } + def getPop(size: Int): InsnNode = { val op = if (size == 1) Opcodes.POP else Opcodes.POP2 new InsnNode(op) @@ -222,29 +227,6 @@ object BytecodeUtils { } } - /** - * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM - * framework only computes these values during bytecode generation. - * - * Since there's currently no better way, we run a bytecode generator on the method and extract - * the computed values. This required changes to the ASM codebase: - * - the [[MethodWriter]] class was made public - * - accessors for maxLocals / maxStack were added to the MethodWriter class - * - * We could probably make this faster (and allocate less memory) by hacking the ASM framework - * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be - * to create a separate visitor for computing those values, duplicating the functionality from the - * MethodWriter. - */ - def computeMaxLocalsMaxStack(method: MethodNode): Unit = { - val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) - val excs = method.exceptions.asScala.toArray - val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] - method.accept(mw) - method.maxLocals = mw.getMaxLocals - method.maxStack = mw.getMaxStack - } - def codeSizeOKForInlining(caller: MethodNode, callee: MethodNode): Boolean = { // Looking at the implementation of CodeSizeEvaluator, all instructions except tableswitch and // lookupswitch are <= 8 bytes. These should be rare enough for 8 to be an OK rough upper bound. @@ -352,15 +334,6 @@ object BytecodeUtils { } } - /** - * A wrapper to make ASM's Analyzer a bit easier to use. - */ - class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, interpreter: Interpreter[V] = new BasicInterpreter) { - val analyzer = new Analyzer(interpreter) - analyzer.analyze(classInternalName, methodNode) - def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) - } - implicit class AnalyzerExtensions[V <: Value](val analyzer: Analyzer[V]) extends AnyVal { def frameAt(instruction: AbstractInsnNode, methodNode: MethodNode): Frame[V] = analyzer.getFrames()(methodNode.instructions.indexOf(instruction)) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 87b51c55b4..535a46f362 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -9,7 +9,6 @@ package opt import scala.collection.immutable.IntMap import scala.reflect.internal.util.{NoPosition, Position} -import scala.tools.asm.tree.analysis.{Value, Analyzer, BasicInterpreter} import scala.tools.asm.{Opcodes, Type, Handle} import scala.tools.asm.tree._ import scala.collection.{concurrent, mutable} @@ -22,6 +21,7 @@ import BytecodeUtils._ class CallGraph[BT <: BTypes](val btypes: BT) { import btypes._ + import analyzers._ /** * The call graph contains the callsites in the program being compiled. @@ -97,13 +97,12 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // It is also used to get the stack height at the call site. localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName) - val analyzer: Analyzer[_ <: Value] = { - if (compilerSettings.YoptNullnessTracking) new NullnessAnalyzer - else new Analyzer(new BasicInterpreter) + val analyzer = { + if (compilerSettings.YoptNullnessTracking) new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer) + else new AsmAnalyzer(methodNode, definingClass.internalName) } - analyzer.analyze(definingClass.internalName, methodNode) - def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer match { + def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer.analyzer match { case nullnessAnalyzer: NullnessAnalyzer => val frame = nullnessAnalyzer.frameAt(call, methodNode) frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue @@ -149,7 +148,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { callsiteClass = definingClass, callee = callee, argInfos = argInfos, - callsiteStackHeight = analyzer.frameAt(call, methodNode).getStackSize, + callsiteStackHeight = analyzer.frameAt(call).getStackSize, receiverKnownNotNull = receiverNotNull, callsitePosition = callsitePositions.getOrElse(call, NoPosition) ) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 1a1a6942dc..23b85d175e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -14,7 +14,6 @@ import scala.reflect.internal.util.NoPosition import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree._ import scala.tools.nsc.backend.jvm.BTypes.InternalName -import scala.tools.nsc.backend.jvm.analysis.ProdConsAnalyzer import BytecodeUtils._ import BackendReporting._ import Opcodes._ @@ -24,6 +23,7 @@ import scala.collection.convert.decorateAsScala._ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ + import analyzers._ /** * If a closure is allocated and invoked within the same method, re-write the invocation to the @@ -204,7 +204,8 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { insertLoadOps(invocation, ownerMethod, argumentLocalsList) // update maxStack - val numCapturedValues = localsForCapturedValues.locals.length // not `localsForCapturedValues.size`: every value takes 1 slot on the stack (also long / double), JVMS 2.6.2 + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numCapturedValues = localsForCapturedValues.locals.length val invocationStackHeight = stackHeight + numCapturedValues - 1 // -1 because the closure is gone if (invocationStackHeight > ownerMethod.maxStack) ownerMethod.maxStack = invocationStackHeight diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 8dff7ef446..36ace2f4f9 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -8,9 +8,7 @@ package backend.jvm package opt import scala.annotation.tailrec -import scala.collection.immutable.IntMap import scala.tools.asm -import asm.Handle import asm.Opcodes._ import asm.tree._ import scala.collection.convert.decorateAsScala._ @@ -18,7 +16,7 @@ import scala.collection.convert.decorateAsJava._ import AsmUtils._ import BytecodeUtils._ import collection.mutable -import scala.tools.asm.tree.analysis.SourceInterpreter +import scala.tools.asm.tree.analysis.{Analyzer, SourceInterpreter} import BackendReporting._ import scala.tools.nsc.backend.jvm.BTypes.InternalName @@ -26,6 +24,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { import btypes._ import callGraph._ import inlinerHeuristics._ + import analyzers._ def eliminateUnreachableCodeAndUpdateCallGraph(methodNode: MethodNode, definingClass: InternalName): Unit = { localOpt.minimalRemoveUnreachableCode(methodNode, definingClass) foreach { @@ -146,7 +145,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { if (!selfTypeOk) { // there's no need to run eliminateUnreachableCode here. building the call graph does that // already, no code can become unreachable in the meantime. - val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new SourceInterpreter) + val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new Analyzer(new SourceInterpreter)) val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length) for (i <- receiverValue.insns.asScala) { val cast = new TypeInsnNode(CHECKCAST, selfParamType.internalName) @@ -410,8 +409,20 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals - val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) // every value takes 1 slot on the stack (also long / double), JVMS 2.6.2 - callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, callee.maxStack + callsiteStackHeight - numStoredArgs) + val maxStackOfInlinedCode = { + // One slot per value is correct for long / double, see comment in the `analysis` package object. + val numStoredArgs = calleeParamTypes.length + (if (isStaticMethod(callee)) 0 else 1) + callee.maxStack + callsiteStackHeight - numStoredArgs + } + val stackHeightAtNullCheck = { + // When adding a null check for the receiver, a DUP is inserted, which might cause a new maxStack. + // If the callsite has other argument values than the receiver on the stack, these are pop'ed + // and stored into locals before the null check, so in that case the maxStack doesn't grow. + val stackSlotForNullCheck = if (!isStaticMethod(callee) && !receiverKnownNotNull && calleeParamTypes.isEmpty) 1 else 0 + callsiteStackHeight + stackSlotForNullCheck + } + + callsiteMethod.maxStack = math.max(callsiteMethod.maxStack, math.max(stackHeightAtNullCheck, maxStackOfInlinedCode)) callGraph.addIfMissing(callee, calleeDeclarationClass) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index f5e6653344..fe398ce652 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -8,8 +8,7 @@ package backend.jvm package opt import scala.annotation.switch -import scala.tools.asm.Opcodes -import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter} +import scala.tools.asm.{Type, ClassWriter, MethodWriter, Opcodes} import scala.tools.asm.tree._ import scala.collection.convert.decorateAsScala._ import scala.tools.nsc.backend.jvm.BTypes.InternalName @@ -49,6 +48,39 @@ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ class LocalOpt[BT <: BTypes](val btypes: BT) { import LocalOptImpls._ import btypes._ + import analyzers._ + + /** + * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM + * framework only computes these values during bytecode generation. + * + * Since there's currently no better way, we run a bytecode generator on the method and extract + * the computed values. This required changes to the ASM codebase: + * - the [[MethodWriter]] class was made public + * - accessors for maxLocals / maxStack were added to the MethodWriter class + * + * We could probably make this faster (and allocate less memory) by hacking the ASM framework + * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be + * to create a separate visitor for computing those values, duplicating the functionality from the + * MethodWriter. + * + * NOTE: the maxStack value computed by this method allocates two slots for long / double values, + * as required by the JVM spec. For running an Analyzer, one slot per long / double would be fine. + * See comment in `analysis` package object. + */ + def computeMaxLocalsMaxStack(method: MethodNode): Unit = { + if (!maxLocalsMaxStackComputed(method)) { + method.maxLocals = 0 + method.maxStack = 0 + val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) + val excs = method.exceptions.asScala.toArray + val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] + method.accept(mw) + method.maxLocals = mw.getMaxLocals + method.maxStack = mw.getMaxStack + maxLocalsMaxStackComputed += method + } + } /** * Remove unreachable code from a method. @@ -179,46 +211,55 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { codeHandlersOrJumpsChanged || localsRemoved || lineNumbersRemoved || labelsRemoved } -} - -object LocalOptImpls { /** * Removes unreachable basic blocks. * - * TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow. - * * @return A set containing eliminated instructions, and a set containing all live label nodes. */ def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Set[AbstractInsnNode], Set[LabelNode]) = { - // The data flow analysis requires the maxLocals / maxStack fields of the method to be computed. - computeMaxLocalsMaxStack(method) - val a = new Analyzer(new BasicInterpreter) - a.analyze(ownerClassName, method) - val frames = a.getFrames + val a = new AsmAnalyzer(method, ownerClassName) + val frames = a.analyzer.getFrames var i = 0 var liveLabels = Set.empty[LabelNode] var removedInstructions = Set.empty[AbstractInsnNode] + var maxLocals = Type.getArgumentsAndReturnSizes(method.desc) >> 2 - (if (BytecodeUtils.isStaticMethod(method)) 1 else 0) + var maxStack = 0 val itr = method.instructions.iterator() while (itr.hasNext) { - itr.next() match { - case l: LabelNode => - if (frames(i) != null) liveLabels += l + val insn = itr.next() + val isLive = frames(i) != null + if (isLive) maxStack = math.max(maxStack, frames(i).getStackSize) - case ins => + insn match { + case l: LabelNode => // label nodes are not removed: they might be referenced for example in a LocalVariableNode - if (frames(i) == null || ins.getOpcode == Opcodes.NOP) { + if (isLive) liveLabels += l + + case v: VarInsnNode if isLive => + val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0 + maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 becauase local numbers are 0-based + + case i: IincInsnNode if isLive => + maxLocals = math.max(maxLocals, i.`var` + 1) + + case _ => + if (!isLive || insn.getOpcode == Opcodes.NOP) { // Instruction iterators allow removing during iteration. // Removing is O(1): instructions are doubly linked list elements. itr.remove() - removedInstructions += ins + removedInstructions += insn } } i += 1 } + method.maxLocals = maxLocals + method.maxStack = maxStack (removedInstructions, liveLabels) } +} +object LocalOptImpls { /** * Remove exception handlers that cover empty code blocks. A block is considered empty if it * consist only of labels, frames, line numbers, nops and gotos. @@ -311,10 +352,7 @@ object LocalOptImpls { // Add the index of the local variable used by `varIns` to the `renumber` array. def addVar(varIns: VarInsnNode): Unit = { val index = varIns.`var` - val isWide = (varIns.getOpcode: @switch) match { - case Opcodes.LLOAD | Opcodes.DLOAD | Opcodes.LSTORE | Opcodes.DSTORE => true - case _ => false - } + val isWide = isSize2LoadOrStore(varIns.getOpcode) // Ensure the length of `renumber`. Unused variable indices are mapped to -1. val minLength = if (isWide) index + 2 else index + 1 diff --git a/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala index 9cf6e366d2..f78d450db1 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzerTest.scala @@ -31,25 +31,22 @@ object NullnessAnalyzerTest extends ClearAfterClass.Clearable { class NullnessAnalyzerTest extends ClearAfterClass { ClearAfterClass.stateToClear = NullnessAnalyzerTest val noOptCompiler = NullnessAnalyzerTest.noOptCompiler + import noOptCompiler.genBCode.bTypes.analyzers._ - def newNullnessAnalyzer(methodNode: MethodNode, classInternalName: InternalName = "C"): NullnessAnalyzer = { - val nullnessAnalyzer = new NullnessAnalyzer - nullnessAnalyzer.analyze(classInternalName, methodNode) - nullnessAnalyzer - } + def newNullnessAnalyzer(methodNode: MethodNode, classInternalName: InternalName = "C") = new AsmAnalyzer(methodNode, classInternalName, new NullnessAnalyzer) - def testNullness(analyzer: NullnessAnalyzer, method: MethodNode, query: String, index: Int, nullness: NullnessValue): Unit = { + def testNullness(analyzer: AsmAnalyzer[NullnessValue], method: MethodNode, query: String, index: Int, nullness: NullnessValue): Unit = { for (i <- findInstr(method, query)) { - val r = analyzer.frameAt(i, method).getValue(index) + val r = analyzer.frameAt(i).getValue(index) assertTrue(s"Expected: $nullness, found: $r. At instr ${textify(i)}", nullness == r) } } // debug / helper for writing tests - def showAllNullnessFrames(analyzer: NullnessAnalyzer, method: MethodNode): String = { + def showAllNullnessFrames(analyzer: AsmAnalyzer[NullnessValue], method: MethodNode): String = { val instrLength = method.instructions.iterator.asScala.map(textify(_).length).max val lines = for (i <- method.instructions.iterator.asScala) yield { - val f = analyzer.frameAt(i, method) + val f = analyzer.frameAt(i) val frameString = { if (f == null) "null" else (0 until (f.getLocals + f.getStackSize)).iterator diff --git a/test/junit/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerTest.scala index a5b3faced8..72f26d231d 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerTest.scala @@ -26,6 +26,7 @@ object ProdConsAnalyzerTest extends ClearAfterClass.Clearable { class ProdConsAnalyzerTest extends ClearAfterClass { ClearAfterClass.stateToClear = ProdConsAnalyzerTest val noOptCompiler = ProdConsAnalyzerTest.noOptCompiler + import noOptCompiler.genBCode.bTypes.analyzers._ def prodToString(producer: AbstractInsnNode) = producer match { case p: InitialProducer => p.toString diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala index 258813ea68..735bbefbbb 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/ClosureOptimizerTest.scala @@ -13,7 +13,6 @@ import org.junit.Assert._ import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis._ -import scala.tools.nsc.backend.jvm.opt.BytecodeUtils.AsmAnalyzer import scala.tools.nsc.io._ import scala.tools.nsc.reporters.StoreReporter import scala.tools.testing.AssertUtil._ diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlineWarningTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlineWarningTest.scala index 029caa995c..e78b5071e0 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlineWarningTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlineWarningTest.scala @@ -13,7 +13,6 @@ import org.junit.Assert._ import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis._ -import scala.tools.nsc.backend.jvm.opt.BytecodeUtils.AsmAnalyzer import scala.tools.nsc.io._ import scala.tools.nsc.reporters.StoreReporter import scala.tools.testing.AssertUtil._ diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index aa7184ffd8..622715ab3a 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -14,7 +14,6 @@ import org.junit.Assert._ import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis._ -import scala.tools.nsc.backend.jvm.opt.BytecodeUtils.AsmAnalyzer import scala.tools.nsc.io._ import scala.tools.nsc.reporters.StoreReporter import scala.tools.testing.AssertUtil._ @@ -69,6 +68,7 @@ class InlinerTest extends ClearAfterClass { val compiler = InlinerTest.compiler import compiler.genBCode.bTypes._ + import compiler.genBCode.bTypes.analyzers._ def compile(scalaCode: String, javaCode: List[(String, String)] = Nil, allowMessage: StoreReporter#Info => Boolean = _ => false): List[ClassNode] = { InlinerTest.notPerRun.foreach(_.clear()) diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/UnreachableCodeTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/UnreachableCodeTest.scala index 0ac206669a..86c8baa3c6 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/UnreachableCodeTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/UnreachableCodeTest.scala @@ -39,7 +39,7 @@ class UnreachableCodeTest extends ClearAfterClass { def assertEliminateDead(code: (Instruction, Boolean)*): Unit = { val method = genMethod()(code.map(_._1): _*) - LocalOptImpls.removeUnreachableCodeImpl(method, "C") + dceCompiler.genBCode.bTypes.localOpt.removeUnreachableCodeImpl(method, "C") val nonEliminated = instructionsFromMethod(method) val expectedLive = code.filter(_._2).map(_._1).toList assertSameCode(nonEliminated, expectedLive) -- cgit v1.2.3 From 0e2c23a34654da667c6a6c88149b090307bdb2ba Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 16 Sep 2015 14:54:49 +0200 Subject: Don't run unreachable code elimination when building the call graph It's not necessary to run it for building the call graph. DCE will be run anyway later down the pipeline. --- src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala | 5 ++--- src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 3 +-- src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 535a46f362..aea4058752 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -95,7 +95,6 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance // call is known to be not-null, in which case we don't have to emit a null check when inlining. // It is also used to get the stack height at the call site. - localOpt.minimalRemoveUnreachableCode(methodNode, definingClass.internalName) val analyzer = { if (compilerSettings.YoptNullnessTracking) new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer) @@ -117,7 +116,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) methodNode.instructions.iterator.asScala foreach { - case call: MethodInsnNode => + case call: MethodInsnNode if analyzer.frameAt(call) != null => // skips over unreachable code val callee: Either[OptimizerWarning, Callee] = for { (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] @@ -153,7 +152,7 @@ class CallGraph[BT <: BTypes](val btypes: BT) { callsitePosition = callsitePositions.getOrElse(call, NoPosition) ) - case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) => + case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if analyzer.frameAt(indy) != null => val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) methodClosureInstantiations += indy -> ClosureInstantiation( diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 36ace2f4f9..4c36377128 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -143,8 +143,7 @@ class Inliner[BT <: BTypes](val btypes: BT) { // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the // receiver value and add a cast to the self type after each. if (!selfTypeOk) { - // there's no need to run eliminateUnreachableCode here. building the call graph does that - // already, no code can become unreachable in the meantime. + localOpt.minimalRemoveUnreachableCode(callsite.callsiteMethod, callsite.callsiteClass.internalName) val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new Analyzer(new SourceInterpreter)) val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length) for (i <- receiverValue.insns.asScala) { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index fe398ce652..684ef30adf 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -87,7 +87,7 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { * * This implementation only removes instructions that are unreachable for an ASM analyzer / * interpreter. This ensures that future analyses will not produce `null` frames. The inliner - * and call graph builder depend on this property. + * depends on this property. * * @return A set containing the eliminated instructions */ -- cgit v1.2.3 From c7112d997452fc6d0e7c69a1fec4928bd6c072ad Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 16 Sep 2015 14:59:27 +0200 Subject: Avoid running data flow analyses on very large methods Data flow analyses take a long time to converge (many seconds) when analyzing large methods. This commit prevents running analyses that would take too long. This means for example that callsites in very large methods are not added to the call graph and will therefore not be inlined. --- .../tools/nsc/backend/jvm/analysis/Analyzers.scala | 19 +++ .../tools/nsc/backend/jvm/opt/CallGraph.scala | 133 +++++++++++---------- .../nsc/backend/jvm/opt/ClosureOptimizer.scala | 2 + .../scala/tools/nsc/backend/jvm/opt/Inliner.scala | 4 + .../scala/tools/nsc/backend/jvm/opt/LocalOpt.scala | 4 +- 5 files changed, 100 insertions(+), 62 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala index 3734c63e93..bb5c6e3820 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/Analyzers.scala @@ -25,5 +25,24 @@ class Analyzers[BT <: BTypes](val btypes: BT) { def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) } + /** + * See the doc comment on package object `analysis` for a discussion on performance. + */ + object AsmAnalyzer { + // jvm limit is 65535 for both number of instructions and number of locals + + private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals + + // with the limits below, analysis should not take more than one second + + private val nullnessSizeLimit = 5000l * 600l * 600l // 5000 insns, 600 locals + private val basicValueSizeLimit = 9000l * 1000l * 1000l + private val sourceValueSizeLimit = 8000l * 950l * 950l + + def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit + def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit + def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit + } + class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index aea4058752..b9788c3f56 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -30,6 +30,11 @@ class CallGraph[BT <: BTypes](val btypes: BT) { * finding callsites efficiently. For example, an inlining heuristic might want to know all * callsites withing a callee method. * + * Note that the call graph is not guaranteed to be complete: callsites may be missing. In + * particular, if a method is very large, all of its callsites might not be in the hash map. + * The reason is that adding a method to the call graph requires running an ASM analyzer, which + * can be too slow. + * * Note that call graph entries (Callsite instances) keep a reference to the invocation * MethodInsnNode, which keeps all AbstractInsnNodes of the method reachable. Adding classes * from the classpath to the call graph (in addition to classes being compiled) may prevent @@ -97,75 +102,81 @@ class CallGraph[BT <: BTypes](val btypes: BT) { // It is also used to get the stack height at the call site. val analyzer = { - if (compilerSettings.YoptNullnessTracking) new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer) - else new AsmAnalyzer(methodNode, definingClass.internalName) + if (compilerSettings.YoptNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer)) + } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName)) + } else None } - def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = analyzer.analyzer match { - case nullnessAnalyzer: NullnessAnalyzer => - val frame = nullnessAnalyzer.frameAt(call, methodNode) - frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue + // if the method is too large to run an analyzer, it is not added to the call graph + if (analyzer.nonEmpty) { + val Some(a) = analyzer + def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match { + case nullnessAnalyzer: NullnessAnalyzer => + val frame = nullnessAnalyzer.frameAt(call, methodNode) + frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue + case _ => false + } - case _ => false - } + var methodCallsites = Map.empty[MethodInsnNode, Callsite] + var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] + + // lazy so it is only computed if actually used by computeArgInfos + lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) + + methodNode.instructions.iterator.asScala foreach { + case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code + val callee: Either[OptimizerWarning, Callee] = for { + (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] + (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] + declarationClassBType = classBTypeFromClassNode(declarationClassNode) + } yield { + val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) + Callee( + callee = method, + calleeDeclarationClass = declarationClassBType, + safeToInline = safeToInline, + safeToRewrite = safeToRewrite, + annotatedInline = annotatedInline, + annotatedNoInline = annotatedNoInline, + samParamTypes = samParamTypes, + calleeInfoWarning = warning) + } - var methodCallsites = Map.empty[MethodInsnNode, Callsite] - var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] - - // lazy so it is only computed if actually used by computeArgInfos - lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) - - methodNode.instructions.iterator.asScala foreach { - case call: MethodInsnNode if analyzer.frameAt(call) != null => // skips over unreachable code - val callee: Either[OptimizerWarning, Callee] = for { - (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] - (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] - declarationClassBType = classBTypeFromClassNode(declarationClassNode) - } yield { - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) - Callee( - callee = method, - calleeDeclarationClass = declarationClassBType, - safeToInline = safeToInline, - safeToRewrite = safeToRewrite, - annotatedInline = annotatedInline, - annotatedNoInline = annotatedNoInline, - samParamTypes = samParamTypes, - calleeInfoWarning = warning) - } + val argInfos = computeArgInfos(callee, call, prodCons) - val argInfos = computeArgInfos(callee, call, prodCons) + val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { + val numArgs = Type.getArgumentTypes(call.desc).length + receiverNotNullByAnalysis(call, numArgs) + } - val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { - val numArgs = Type.getArgumentTypes(call.desc).length - receiverNotNullByAnalysis(call, numArgs) - } + methodCallsites += call -> Callsite( + callsiteInstruction = call, + callsiteMethod = methodNode, + callsiteClass = definingClass, + callee = callee, + argInfos = argInfos, + callsiteStackHeight = a.frameAt(call).getStackSize, + receiverKnownNotNull = receiverNotNull, + callsitePosition = callsitePositions.getOrElse(call, NoPosition) + ) + + case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null => + val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) + methodClosureInstantiations += indy -> ClosureInstantiation( + lmf, + methodNode, + definingClass, + capturedArgInfos) - methodCallsites += call -> Callsite( - callsiteInstruction = call, - callsiteMethod = methodNode, - callsiteClass = definingClass, - callee = callee, - argInfos = argInfos, - callsiteStackHeight = analyzer.frameAt(call).getStackSize, - receiverKnownNotNull = receiverNotNull, - callsitePosition = callsitePositions.getOrElse(call, NoPosition) - ) - - case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if analyzer.frameAt(indy) != null => - val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) - val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) - methodClosureInstantiations += indy -> ClosureInstantiation( - lmf, - methodNode, - definingClass, - capturedArgInfos) - - case _ => - } + case _ => + } - callsites(methodNode) = methodCallsites - closureInstantiations(methodNode) = methodClosureInstantiations + callsites(methodNode) = methodCallsites + closureInstantiations(methodNode) = methodClosureInstantiations + } } def computeArgInfos(callee: Either[OptimizerWarning, Callee], callsiteInsn: MethodInsnNode, prodCons: => ProdConsAnalyzer): IntMap[ArgInfo] = { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala index 23b85d175e..5d3b7ff272 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ClosureOptimizer.scala @@ -78,6 +78,8 @@ class ClosureOptimizer[BT <: BTypes](val btypes: BT) { closureInstantiations.iterator.flatMap({ case (methodNode, closureInits) => // A lazy val to ensure the analysis only runs if necessary (the value is passed by name to `closureCallsites`) + // We don't need to worry about the method being too large for running an analysis: large + // methods are not added to the call graph / closureInstantiations map. lazy val prodCons = new ProdConsAnalyzer(methodNode, closureInits.valuesIterator.next().ownerClass.internalName) val sortedInits = immutable.TreeSet.empty ++ closureInits.values sortedInits.iterator.map(init => (init, closureCallsites(init, prodCons))).filter(_._2.nonEmpty) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 4c36377128..54f1b99876 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -143,6 +143,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { // VerifyError. We run a `SourceInterpreter` to find all producer instructions of the // receiver value and add a cast to the self type after each. if (!selfTypeOk) { + // We don't need to worry about the method being too large for running an analysis. + // Callsites of large methods are not added to the call graph. localOpt.minimalRemoveUnreachableCode(callsite.callsiteMethod, callsite.callsiteClass.internalName) val analyzer = new AsmAnalyzer(callsite.callsiteMethod, callsite.callsiteClass.internalName, new Analyzer(new SourceInterpreter)) val receiverValue = analyzer.frameAt(callsite.callsiteInstruction).peekStack(traitMethodArgumentTypes.length) @@ -367,6 +369,8 @@ class Inliner[BT <: BTypes](val btypes: BT) { // We run an interpreter to know the stack height at each xRETURN instruction and the sizes // of the values on the stack. + // We don't need to worry about the method being too large for running an analysis. Callsites of + // large methods are not added to the call graph. val analyzer = new AsmAnalyzer(callee, calleeDeclarationClass.internalName) for (originalReturn <- callee.instructions.iterator().asScala if isReturn(originalReturn)) { diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index 684ef30adf..1e7b46012e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -94,6 +94,7 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Set[AbstractInsnNode] = { if (method.instructions.size == 0) return Set.empty // fast path for abstract methods if (unreachableCodeEliminated(method)) return Set.empty // we know there is no unreachable code + if (!AsmAnalyzer.sizeOKForBasicValue(method)) return Set.empty // the method is too large for running an analyzer // For correctness, after removing unreachable code, we have to eliminate empty exception // handlers, see scaladoc of def methodOptimizations. Removing an live handler may render more @@ -169,9 +170,10 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { // This triggers "ClassFormatError: Illegal exception table range in class file C". Similar // for local variables in dead blocks. Maybe that's a bug in the ASM framework. + def canRunDCE = AsmAnalyzer.sizeOKForBasicValue(method) def removalRound(): Boolean = { // unreachable-code, empty-handlers and simplify-jumps run until reaching a fixpoint (see doc on class LocalOpt) - val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode) { + val (codeRemoved, handlersRemoved, liveHandlerRemoved) = if (compilerSettings.YoptUnreachableCode && canRunDCE) { val (removedInstructions, liveLabels) = removeUnreachableCodeImpl(method, ownerClassName) val removedHandlers = removeEmptyExceptionHandlers(method) (removedInstructions.nonEmpty, removedHandlers.nonEmpty, removedHandlers.exists(h => liveLabels(h.start))) -- cgit v1.2.3 From 59a5d3b8fd036d36afcf6349bc3cc527344981a1 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 18 Sep 2015 10:26:01 +0200 Subject: Emit exception handlers for inlined methods in the correct order Handler tables are lists of tuples (try-start, try-end, handler-start, exception-type). When an instruction throws, the first handler in the list that covers the instruction and matches the type is executed. For nested handlers, it is the job of the compiler to add them to the handler table in the correct order. When inlining a method, the handlers of the callee are prepended to the list of handlers in the callsite method. This ensures that the callee's handlers are tested first if an exception is thrown in the inlined code. --- src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala | 6 +++++- test/files/run/inlineHandlers.scala | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test/files/run/inlineHandlers.scala (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala index 54f1b99876..20256ca63b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala @@ -409,7 +409,11 @@ class Inliner[BT <: BTypes](val btypes: BT) { callsiteMethod.instructions.remove(callsiteInstruction) callsiteMethod.localVariables.addAll(cloneLocalVariableNodes(callee, labelsMap, callee.name + "_", localVarShift).asJava) - callsiteMethod.tryCatchBlocks.addAll(cloneTryCatchBlockNodes(callee, labelsMap).asJava) + // prepend the handlers of the callee. the order of handlers matters: when an exception is thrown + // at some instruction, the first handler guarding that instruction and having a matching exception + // type is executed. prepending the callee's handlers makes sure to test those handlers first if + // an exception is thrown in the inlined code. + callsiteMethod.tryCatchBlocks.addAll(0, cloneTryCatchBlockNodes(callee, labelsMap).asJava) callsiteMethod.maxLocals += returnType.getSize + callee.maxLocals val maxStackOfInlinedCode = { diff --git a/test/files/run/inlineHandlers.scala b/test/files/run/inlineHandlers.scala new file mode 100644 index 0000000000..8c672a07b9 --- /dev/null +++ b/test/files/run/inlineHandlers.scala @@ -0,0 +1,7 @@ +object Test { + @noinline def ham: String = throw null + @inline def inner: String = try { ham } catch { case _: NullPointerException => "npe" } + def foo = try inner catch { case e: Throwable => throw e } + + def main(args: Array[String]): Unit = assert(foo == "npe") +} -- cgit v1.2.3 From 41e5ef4396a1526cd8c58157912ffed830a96f1e Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 17 Sep 2015 20:15:36 +0200 Subject: First version of inliningh heuristics that prefer higher-order methos When invoking a higher-order method and the value passed for the SAM type is either a function literal or a parameter of the callsite method, inline the higher-order method into the callee. This is a first version, the heuristics will be refined further. --- .../nsc/backend/jvm/opt/InlinerHeuristics.scala | 12 ++++++-- .../scala/tools/nsc/settings/ScalaSettings.scala | 4 +-- test/files/run/mapConserve.scala | 2 +- .../tools/nsc/backend/jvm/opt/InlinerTest.scala | 33 ++++++++++++++++++++++ 4 files changed, 46 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala index cd10094204..e559b63c09 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InlinerHeuristics.scala @@ -84,8 +84,16 @@ class InlinerHeuristics[BT <: BTypes](val bTypes: BT) { if (callee.safeToInline && callee.annotatedInline) Some(InlineRequest(callsite, Nil)) else None -// case "default" => - + case "default" => + val callee = callsite.callee.get + if (callee.safeToInline && !callee.annotatedNoInline) { + val shouldInlineHO = callee.samParamTypes.nonEmpty && (callee.samParamTypes exists { + case (index, _) => callsite.argInfos.contains(index) + }) + + if (shouldInlineHO || callee.annotatedInline) Some(InlineRequest(callsite, Nil)) + else None + } else None } /* diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index ea9ad1d909..74d152a4cf 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -276,8 +276,8 @@ trait ScalaSettings extends AbsScalaSettings name = "-Yopt-inline-heuristics", helpArg = "strategy", descr = "Set the heuristics for inlining decisions.", - choices = List("at-inline-annotated", "everything"), - default = "at-inline-annotated") + choices = List("at-inline-annotated", "everything", "default"), + default = "default") object YoptWarningsChoices extends MultiChoiceEnumeration { val none = Choice("none" , "No optimizer warnings.") diff --git a/test/files/run/mapConserve.scala b/test/files/run/mapConserve.scala index c17754283a..95cad69954 100644 --- a/test/files/run/mapConserve.scala +++ b/test/files/run/mapConserve.scala @@ -1,5 +1,5 @@ /* - * filter: inliner warnings; re-run with + * filter: inliner warning */ import scala.annotation.tailrec import scala.collection.mutable.ListBuffer diff --git a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala index 622715ab3a..8429a583b5 100644 --- a/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala +++ b/test/junit/scala/tools/nsc/backend/jvm/opt/InlinerTest.scala @@ -1101,4 +1101,37 @@ class InlinerTest extends ClearAfterClass { assertInvoke(getSingleMethod(c, "g"), "C", "f") // g itself still has the call to f assert(r.isEmpty, r) } + + /** + * NOTE: if this test fails for you when running within the IDE, it's probably because you're + * using 2.12.0-M2 for compilining within the IDE, which doesn't add SAM information to the + * InlineInfo attribute. So the InlineInfo in the classfile for Function1 doesn't say that + * it's a SAM type. The test passes when running with ant (which does a full bootstrap). + */ + @Test + def inlineHigherOrder(): Unit = { + val code = + """class C { + | final def h(f: Int => Int): Int = f(0) + | def t1 = h(x => x + 1) + | def t2 = { + | val fun = (x: Int) => x + 1 + | h(fun) + | } + | def t3(f: Int => Int) = h(f) + | def t4(f: Int => Int) = { + | val fun = f + | h(fun) + | } + | def t5 = h(Map(0 -> 10)) // not currently inlined + |} + """.stripMargin + + val List(c) = compile(code) + assertInvoke(getSingleMethod(c, "t1"), "C", "C$$$anonfun$1") + assertInvoke(getSingleMethod(c, "t2"), "C", "C$$$anonfun$2") + assertInvoke(getSingleMethod(c, "t3"), "scala/Function1", "apply$mcII$sp") + assertInvoke(getSingleMethod(c, "t4"), "scala/Function1", "apply$mcII$sp") + assertInvoke(getSingleMethod(c, "t5"), "C", "h") + } } -- cgit v1.2.3