summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLukas Rytz <lukas.rytz@gmail.com>2015-01-19 10:00:04 +0100
committerLukas Rytz <lukas.rytz@gmail.com>2015-03-11 12:53:34 -0700
commit37f7b76710c72360577250f07bd8b5cf55e527cc (patch)
tree606593339c410b055f1b2ca99502d67ba721c1c1 /src
parent0d8b32469ec655f35a31843e1843b8a580e772d1 (diff)
downloadscala-37f7b76710c72360577250f07bd8b5cf55e527cc.tar.gz
scala-37f7b76710c72360577250f07bd8b5cf55e527cc.tar.bz2
scala-37f7b76710c72360577250f07bd8b5cf55e527cc.zip
Integrate the inliner into the backend pipeline
The current heuristics are simple: attempt to inline every method annotated `@inline`. Cycles in the inline request graph are broken in a determinisitc manner. Inlining is then performed starting at the leaves of the inline request graph, i.e., with those callsites where the target method has no callsites to inline. This expansion strategy can make a method grow arbitrarily. We will most likely have to add some thresholds and / or other measures to prevent size issues.
Diffstat (limited to 'src')
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala22
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala122
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala38
3 files changed, 178 insertions, 4 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
index 9b3bd7648d..173aa0ca30 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/GenBCode.scala
@@ -217,11 +217,26 @@ abstract class GenBCode extends BCodeSyncAndTry {
class Worker2 {
lazy val localOpt = new LocalOpt(settings)
+ def runGlobalOptimizations(): Unit = {
+ import scala.collection.convert.decorateAsScala._
+ q2.asScala foreach {
+ case Item2(_, _, plain, _, _) =>
+ // skip mirror / bean: wd don't inline into tem, and they are not used in the plain class
+ if (plain != null) {
+ localOpt.minimalRemoveUnreachableCode(plain)
+ callGraph.addClass(plain)
+ }
+ }
+ bTypes.inliner.runInliner()
+ }
+
def localOptimizations(classNode: ClassNode): Unit = {
BackendStats.timed(BackendStats.methodOptTimer)(localOpt.methodOptimizations(classNode))
}
def run() {
+ if (settings.YoptInlinerEnabled) runGlobalOptimizations()
+
while (true) {
val item = q2.poll
if (item.isPoison) {
@@ -269,7 +284,12 @@ abstract class GenBCode extends BCodeSyncAndTry {
var arrivalPos = 0
- /*
+ /**
+ * The `run` method is overridden because the backend has a different data flow than the default
+ * phase: the backend does not transform compilation units one by one, but on all units in the
+ * same run. This allows cross-unit optimizations and running some stages of the backend
+ * concurrently on multiple units.
+ *
* A run of the BCodePhase phase comprises:
*
* (a) set-up steps (most notably supporting maps in `BCodeTypes`,
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
index 7527491e9b..b74c7ba86c 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/Inliner.scala
@@ -7,6 +7,7 @@ package scala.tools.nsc
package backend.jvm
package opt
+import scala.annotation.tailrec
import scala.tools.asm
import asm.Opcodes._
import asm.tree._
@@ -16,11 +17,132 @@ import AsmUtils._
import BytecodeUtils._
import OptimizerReporting._
import scala.tools.asm.tree.analysis._
+import collection.mutable
class Inliner[BT <: BTypes](val btypes: BT) {
import btypes._
import callGraph._
+ def runInliner(): Unit = {
+ for (request <- collectAndOrderInlineRequests) {
+ val Some(callee) = request.callee
+ inline(request.callsiteInstruction, request.callsiteStackHeight, request.callsiteMethod, request.callsiteClass,
+ callee.callee, callee.calleeDeclarationClass,
+ receiverKnownNotNull = false, keepLineNumbers = false)
+ }
+ }
+
+ /**
+ * Ordering for inline requests. Required to make the inliner deterministic:
+ * - Always remove the same request when breaking inlining cycles
+ * - Perform inlinings in a consistent order
+ */
+ object callsiteOrdering extends Ordering[Callsite] {
+ override def compare(x: Callsite, y: Callsite): Int = {
+ val cls = x.callsiteClass.internalName compareTo y.callsiteClass.internalName
+ if (cls != 0) return cls
+
+ val name = x.callsiteMethod.name compareTo y.callsiteMethod.name
+ if (name != 0) return name
+
+ val desc = x.callsiteMethod.desc compareTo y.callsiteMethod.desc
+ if (desc != 0) return desc
+
+ def pos(c: Callsite) = c.callsiteMethod.instructions.indexOf(c.callsiteInstruction)
+ pos(x) - pos(y)
+ }
+ }
+
+ /**
+ * Select callsites from the call graph that should be inlined. The resulting list of inlining
+ * requests is allowed to have cycles, and the callsites can appear in any order.
+ */
+ def selectCallsitesForInlining: List[Callsite] = {
+ callsites.iterator.filter({
+ case (_, callsite) => callsite.callee match {
+ case Some(Callee(callee, _, safeToInline, Some(annotatedInline), _)) =>
+ // TODO: fix inlining from traits.
+ // For trait methods the callee is abstract: "trait T { @inline final def f = 1}".
+ // A callsite (t: T).f is `safeToInline` (effectivelyFinal is true), but the callee is the
+ // abstract method in the interface.
+ !isAbstractMethod(callee) && safeToInline && annotatedInline
+ case _ => false
+ }
+ case _ => false
+ }).map(_._2).toList
+ }
+
+ /**
+ * Returns the callsites that can be inlined. Ensures that the returned inline request graph does
+ * not contain cycles.
+ *
+ * The resulting list is sorted such that the leaves of the inline request graph are on the left.
+ * Once these leaves are inlined, the successive elements will be leaves, etc.
+ */
+ private def collectAndOrderInlineRequests: List[Callsite] = {
+ val requests = selectCallsitesForInlining
+
+ // This map is an index to look up the inlining requests for a method. The value sets are mutable
+ // to allow removing elided requests (to break inlining cycles). The map itself is mutable to
+ // allow efficient building: requests.groupBy would build values as List[Callsite] that need to
+ // be transformed to mutable sets.
+ val inlineRequestsForMethod: mutable.Map[MethodNode, mutable.Set[Callsite]] = mutable.HashMap.empty.withDefaultValue(mutable.HashSet.empty)
+ for (r <- requests) inlineRequestsForMethod.getOrElseUpdate(r.callsiteMethod, mutable.HashSet.empty) += r
+
+ /**
+ * Break cycles in the inline request graph by removing callsites.
+ *
+ * The list `requests` is traversed left-to-right, removing those callsites that are part of a
+ * cycle. Elided callsites are also removed from the `inlineRequestsForMethod` map.
+ */
+ def breakInlineCycles(requests: List[Callsite]): List[Callsite] = {
+ // is there a path of inline requests from start to goal?
+ def isReachable(start: MethodNode, goal: MethodNode): Boolean = {
+ @tailrec def reachableImpl(check: List[MethodNode], visited: Set[MethodNode]): Boolean = check match {
+ case x :: xs =>
+ if (x == goal) true
+ else if (visited(x)) reachableImpl(xs, visited)
+ else {
+ val callees = inlineRequestsForMethod(x).map(_.callee.get.callee)
+ reachableImpl(xs ::: callees.toList, visited + x)
+ }
+
+ case Nil =>
+ false
+ }
+ reachableImpl(List(start), Set.empty)
+ }
+
+ val result = new mutable.ListBuffer[Callsite]()
+ // sort the inline requests to ensure that removing requests is deterministic
+ for (r <- requests.sorted(callsiteOrdering)) {
+ // is there a chain of inlining requests that would inline the callsite method into the callee?
+ if (isReachable(r.callee.get.callee, r.callsiteMethod))
+ inlineRequestsForMethod(r.callsiteMethod) -= r
+ else
+ result += r
+ }
+ result.toList
+ }
+
+ // sort the remaining inline requests such that the leaves appear first, then those requests
+ // that become leaves, etc.
+ def leavesFirst(requests: List[Callsite], visited: Set[Callsite] = Set.empty): List[Callsite] = {
+ if (requests.isEmpty) Nil
+ else {
+ val (leaves, others) = requests.partition(r => {
+ val inlineRequestsForCallee = inlineRequestsForMethod(r.callee.get.callee)
+ inlineRequestsForCallee.forall(visited)
+ })
+ assert(leaves.nonEmpty, requests)
+ leaves ::: leavesFirst(others, visited ++ leaves)
+ }
+ }
+
+ leavesFirst(breakInlineCycles(requests))
+ }
+
+
/**
* Copy and adapt the instructions of a method to a callsite.
*
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
index 23c8daa046..8c5a31658c 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala
@@ -12,6 +12,7 @@ import scala.tools.asm.Opcodes
import scala.tools.asm.tree.analysis.{Analyzer, BasicInterpreter}
import scala.tools.asm.tree._
import scala.collection.convert.decorateAsScala._
+import scala.tools.nsc.backend.jvm.BTypes.InternalName
import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
import scala.tools.nsc.settings.ScalaSettings
@@ -48,6 +49,37 @@ import scala.tools.nsc.settings.ScalaSettings
*/
class LocalOpt(settings: ScalaSettings) {
/**
+ * Remove unreachable code from all methods of `classNode`. See of its overload.
+ *
+ * @param classNode The class to optimize
+ * @return `true` if unreachable code was removed from any method
+ */
+ def minimalRemoveUnreachableCode(classNode: ClassNode): Boolean = {
+ classNode.methods.asScala.foldLeft(false) {
+ case (changed, method) => minimalRemoveUnreachableCode(method, classNode.name) || changed
+ }
+ }
+
+ /**
+ * Remove unreachable code from a method.
+ *
+ * This implementation only removes instructions that are unreachable for an ASM analyzer /
+ * interpreter. This ensures that future analyses will not produce `null` frames. The inliner
+ * depends on this property.
+ */
+ def minimalRemoveUnreachableCode(method: MethodNode, ownerClassName: InternalName): Boolean = {
+ if (method.instructions.size == 0) return false // fast path for abstract methods
+
+ val (codeRemoved, _) = removeUnreachableCodeImpl(method, ownerClassName)
+ if (codeRemoved) {
+ // Required for correctness, see comment in class LocalOpt
+ removeEmptyExceptionHandlers(method)
+ removeUnusedLocalVariableNodes(method)()
+ }
+ codeRemoved
+ }
+
+ /**
* Remove unreachable instructions from all (non-abstract) methods and apply various other
* cleanups to the bytecode.
*
@@ -73,7 +105,7 @@ class LocalOpt(settings: ScalaSettings) {
*
* Returns `true` if the bytecode of `method` was changed.
*/
- def methodOptimizations(method: MethodNode, ownerClassName: String): Boolean = {
+ def methodOptimizations(method: MethodNode, ownerClassName: InternalName): Boolean = {
if (method.instructions.size == 0) return false // fast path for abstract methods
// unreachable-code also removes unused local variable nodes and empty exception handlers.
@@ -124,7 +156,7 @@ class LocalOpt(settings: ScalaSettings) {
// (*) Removing stale local variable descriptors is required for correctness of unreachable-code
val localsRemoved =
- if (settings.YoptCompactLocals) compactLocalVariables(method)
+ if (settings.YoptCompactLocals) compactLocalVariables(method) // also removes unused
else if (settings.YoptUnreachableCode) removeUnusedLocalVariableNodes(method)() // (*)
else false
@@ -146,7 +178,7 @@ class LocalOpt(settings: ScalaSettings) {
*
* TODO: rewrite, don't use computeMaxLocalsMaxStack (runs a ClassWriter) / Analyzer. Too slow.
*/
- def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: String): (Boolean, Set[LabelNode]) = {
+ def removeUnreachableCodeImpl(method: MethodNode, ownerClassName: InternalName): (Boolean, Set[LabelNode]) = {
// The data flow analysis requires the maxLocals / maxStack fields of the method to be computed.
computeMaxLocalsMaxStack(method)
val a = new Analyzer(new BasicInterpreter)