summaryrefslogtreecommitdiff
path: root/src/compiler/scala/tools/nsc/backend/jvm/analysis
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/scala/tools/nsc/backend/jvm/analysis')
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala556
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala514
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala273
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala191
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala (renamed from src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala)80
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala36
-rw-r--r--src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala374
7 files changed, 1683 insertions, 341 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
index 7bbe1e2a49..086946e4e3 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala
@@ -3,17 +3,22 @@ package backend.jvm
package analysis
import scala.annotation.switch
-import scala.collection.{mutable, immutable}
+import scala.collection.mutable
import scala.tools.asm.Opcodes
import scala.tools.asm.tree._
import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter}
import opt.BytecodeUtils._
+import AliasSet.SmallBitSet
-object AliasingFrame {
- private var _idCounter: Long = 0l
- private def nextId = { _idCounter += 1; _idCounter }
-}
-
+/**
+ * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack.
+ *
+ * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple
+ * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower
+ * than a BasicValue analysis.
+ *
+ * See the doc of package object `analysis` for some notes on the performance of alias analysis.
+ */
class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) {
import Opcodes._
@@ -23,63 +28,80 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
init(src)
}
- /**
- * For each slot (entry in the `values` array of the frame), an id that uniquely represents
- * the object stored in it. If two values have the same id, they are aliases of the same
- * object.
- */
- private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId)
+ override def toString: String = super.toString + " - " + aliases.toList.filter(s => s != null && s.size > 1).map(_.toString).distinct.mkString(",")
/**
- * The object alias id of for a value index.
- */
- def aliasId(entry: Int) = aliasIds(entry)
-
- /**
- * Returns the indices of the values array which are aliases of the object `id`.
+ * For every value the set of values that are aliases of it.
+ *
+ * Invariants:
+ * - If `aliases(i) == null` then i has no aliases. This is equivalent to having
+ * `aliases(i) == SingletonSet(i)`.
+ * - If `aliases(i) != null` then `aliases(i) contains i`.
+ * - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the
+ * same (mutable) AliasSet.
*/
- def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id)
+ val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize)
/**
* The set of aliased values for a given entry in the `values` array.
*/
- def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry))
+ def aliasesOf(entry: Int): AliasSet = {
+ if (aliases(entry) != null) aliases(entry)
+ else {
+ val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1)
+ aliases(entry) = init
+ init
+ }
+ }
/**
- * Define a new alias. For example, given
- * var a = this // this, a have the same aliasId
- * then an assignment
+ * Define a new alias. For example, an assignment
* b = a
- * will set the same the aliasId for `b`.
+ * adds b to the set of aliases of a.
*/
private def newAlias(assignee: Int, source: Int): Unit = {
- aliasIds(assignee) = aliasIds(source)
+ removeAlias(assignee)
+ val sourceAliases = aliasesOf(source)
+ sourceAliases += assignee
+ aliases(assignee) = sourceAliases
}
/**
- * An assignment
+ * Remove an alias. For example, an assignment
* a = someUnknownValue()
- * sets a fresh alias id for `a`.
- * A stack value is also removed from its alias set when being consumed.
+ * removes a from its former alias set.
+ * As another example, stack values are removed from their alias sets when being consumed.
*/
private def removeAlias(assignee: Int): Unit = {
- aliasIds(assignee) = AliasingFrame.nextId
+ if (aliases(assignee) != null) {
+ aliases(assignee) -= assignee
+ aliases(assignee) = null
+ }
+ }
+
+ /**
+ * Define the alias set for a given value.
+ */
+ private def setAliasSet(assignee: Int, set: AliasSet): Unit = {
+ if (aliases(assignee) != null) {
+ aliases(assignee) -= assignee
+ }
+ aliases(assignee) = set
}
override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = {
- // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop)
+ // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop)
def stackTop: Int = this.stackTop
def peekStack(n: Int): V = this.peekStack(n)
- // the val pattern `val (p, c) = f` still allocates a tuple (https://github.com/scala-opt/scala/issues/28)
- val prodCons = InstructionStackEffect(insn, this) // needs to be called before super.execute, see its doc
- val consumed = prodCons._1
- val produced = prodCons._2
+ val prodCons = InstructionStackEffect.forAsmAnalysis(insn, this) // needs to be called before super.execute, see its doc
+ val consumed = InstructionStackEffect.cons(prodCons)
+ val produced = InstructionStackEffect.prod(prodCons)
super.execute(insn, interpreter)
(insn.getOpcode: @switch) match {
- case ALOAD =>
+ case ILOAD | LLOAD | FLOAD | DLOAD | ALOAD =>
newAlias(assignee = stackTop, source = insn.asInstanceOf[VarInsnNode].`var`)
case DUP =>
@@ -166,31 +188,54 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
}
case SWAP =>
+ // could be written more elegantly with higher-order combinators, but thinking of performance
val top = stackTop
- val idTop = aliasIds(top)
- aliasIds(top) = aliasIds(top - 1)
- aliasIds(top - 1) = idTop
- case opcode =>
- if (opcode == ASTORE) {
- // Not a separate case because we need to remove the consumed stack value from alias sets after.
- val stackTopBefore = stackTop - produced + consumed
- val local = insn.asInstanceOf[VarInsnNode].`var`
- newAlias(assignee = local, source = stackTopBefore)
- // if the value written is size 2, it overwrites the subsequent slot, which is then no
- // longer an alias of anything. see the corresponding case in `Frame.execute`.
- if (getLocal(local).getSize == 2)
- removeAlias(local + 1)
-
- // if the value at the preceding index is size 2, it is no longer valid, so we remove its
- // aliasing. see corresponding case in `Frame.execute`
- if (local > 0) {
- val precedingValue = getLocal(local - 1)
- if (precedingValue != null && precedingValue.getSize == 2)
- removeAlias(local - 1)
+ def moveNextToTop(): Unit = {
+ val nextAliases = aliases(top - 1)
+ aliases(top) = nextAliases
+ nextAliases -= (top - 1)
+ nextAliases += top
+ }
+
+ if (aliases(top) != null) {
+ val topAliases = aliases(top)
+ if (aliases(top - 1) != null) moveNextToTop()
+ else aliases(top) = null
+ // move top to next
+ aliases(top - 1) = topAliases
+ topAliases -= top
+ topAliases += (top - 1)
+ } else {
+ if (aliases(top - 1) != null) {
+ moveNextToTop()
+ aliases(top - 1) = null
}
}
+ case opcode =>
+ (opcode: @switch) match {
+ case ISTORE | LSTORE | FSTORE | DSTORE | ASTORE =>
+ // not a separate case: we re-use the code below that removes the consumed stack value from alias sets
+ val stackTopBefore = stackTop - produced + consumed
+ val local = insn.asInstanceOf[VarInsnNode].`var`
+ newAlias(assignee = local, source = stackTopBefore)
+ // if the value written is size 2, it overwrites the subsequent slot, which is then no
+ // longer an alias of anything. see the corresponding case in `Frame.execute`.
+ if (getLocal(local).getSize == 2)
+ removeAlias(local + 1)
+
+ // if the value at the preceding index is size 2, it is no longer valid, so we remove its
+ // aliasing. see corresponding case in `Frame.execute`
+ if (local > 0) {
+ val precedingValue = getLocal(local - 1)
+ if (precedingValue != null && precedingValue.getSize == 2)
+ removeAlias(local - 1)
+ }
+
+ case _ =>
+ }
+
// Remove consumed stack values from aliasing sets.
// Example: iadd
// - before: local1, local2, stack1, consumed1, consumed2
@@ -198,10 +243,22 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
val firstConsumed = stackTop - produced + 1 // firstConsumed = 3
for (i <- 0 until consumed)
removeAlias(firstConsumed + i) // remove aliases for 3 and 4
+ }
+ }
- // We don't need to set the aliases ids for the produced values: the aliasIds array already
- // contains fresh ids for non-used stack values (ensured by removeAlias).
+ /**
+ * When entering an exception handler, all values are dropped from the stack (and the exception
+ * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To
+ * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets.
+ */
+ override def clearStack(): Unit = {
+ var i = getLocals
+ val end = i + getStackSize
+ while (i < end) {
+ removeAlias(i)
+ i += 1
}
+ super.clearStack()
}
/**
@@ -217,30 +274,131 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc
* x = a
* y = b // (x, a) and (y, b)
* }
- * [...] // (x, a)
+ * [...] // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b))
*/
override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = {
+ // merge is the main performance hot spot of a data flow analysis.
+
+ // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of
+ // the overall analysis time.
val valuesChanged = super.merge(other, interpreter)
+
+ // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this
+ // code has been heavily optimized. most of the time is spent in the `hasNext` method of the
+ // andNotIterator, see its comment.
+
var aliasesChanged = false
val aliasingOther = other.asInstanceOf[AliasingFrame[_]]
- for (i <- aliasIds.indices) {
- val thisAliases = aliasesOf(i)
- val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i))
- if (thisNotOther.nonEmpty) {
- aliasesChanged = true
- thisNotOther foreach removeAlias
+
+ val numValues = getLocals + getStackSize
+ // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set
+ // for a, we already see that a and b will be aliases in the final result. so we can skip over
+ // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be
+ // set to `true`.
+ val knownOk = new Array[Boolean](numValues)
+ var i = 0
+ while (i < numValues) {
+ if (!knownOk(i)) {
+ val thisAliases = this.aliases(i)
+ val otherAliases = aliasingOther.aliases(i)
+ if (thisAliases != null) {
+ if (otherAliases == null) {
+ if (thisAliases.size > 1) {
+ aliasesChanged = true
+ removeAlias(i)
+ }
+ } else {
+ // The iterator yields elements that are in `thisAliases` but not in `otherAliases`.
+ // As a side-effect, for every index `i` that is in both alias sets, the iterator sets
+ // `knownOk(i) = true`: the alias sets for these values don't need to be merged again.
+ val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk)
+ if (thisNotOtherIt.hasNext) {
+ aliasesChanged = true
+ val newSet = AliasSet.empty
+ while (thisNotOtherIt.hasNext) {
+ val next = thisNotOtherIt.next()
+ newSet += next
+ setAliasSet(next, newSet)
+ }
+ }
+ }
+ }
}
+ i += 1
}
+
valuesChanged || aliasesChanged
}
+ private def min(s: SmallBitSet) = {
+ var r = s.a
+ if ( s.b < r) r = s.b
+ if (s.c != -1 && s.c < r) r = s.c
+ if (s.d != -1 && s.d < r) r = s.d
+ r
+ }
+
override def init(src: Frame[_ <: V]): Frame[V] = {
- super.init(src)
- compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length)
+ super.init(src) // very quick (just an arraycopy)
+ System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick
+
+ val newSets = mutable.HashMap.empty[AliasSet, AliasSet]
+
+ // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to
+ // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time.
+ // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are
+ // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the
+ // two values.
+
+ var i = 0
+ while (i < aliases.length) {
+ val set = aliases(i)
+ if (set != null) {
+ // size cannot be 0 - alias sets are always at least singletons.
+ // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow
+ if (set.size == 1) {
+ aliases(i) = null
+ } else if (set.size <= 4) {
+ val small = set.set.asInstanceOf[AliasSet.SmallBitSet]
+ val firstOfSet = i == min(small)
+ if (firstOfSet) {
+ val newSet = set.clone()
+ aliases(small.a) = newSet
+ aliases(small.b) = newSet
+ if (small.c != -1) aliases(small.c) = newSet
+ if (small.d != -1) aliases(small.d) = newSet
+ }
+ } else {
+ // the actual hot spot is the hash map operations here: this is where almost all of the 20%
+ // mentioned above is spent.
+ // i also benchmarked an alternative implementation: keep an array of booleans for indexes
+ // that already contain the cloned set. iterate through all elements of the cloned set and
+ // assign the cloned set. this approach is 50% slower than using a hash map.
+ if (newSets contains set) aliases(i) = newSets(set)
+ else {
+ val newSet = set.clone()
+ newSets(set) = newSet
+ aliases(i) = newSet
+ }
+ }
+ }
+ i += 1
+ }
this
}
}
+object AliasingFrame {
+// val start1 = AliasingFrame.timer1.start()
+// AliasingFrame.timer1.stop(start1)
+ import scala.reflect.internal.util.Statistics._
+ val timer1 = newTimer("t1", "jvm")
+ val timer2 = newTimer("t2", "jvm")
+ val timer3 = newTimer("t3", "jvm")
+ val timers = List(timer1, timer2, timer3)
+ def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 }
+}
+
/**
* An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis
* needs to track aliases, but doesn't require a more specific Frame subclass.
@@ -249,3 +407,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer
override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack)
override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src)
}
+
+/**
+ * An iterator over Int (required to prevent boxing the result of next).
+ */
+abstract class IntIterator extends Iterator[Int] {
+ def hasNext: Boolean
+ def next(): Int
+}
+
+/**
+ * An efficient mutable bit set.
+ *
+ * @param set Either a SmallBitSet or an Array[Long]
+ * @param size The size of the set, useful for performance of certain operations
+ */
+class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) {
+ import AliasSet._
+
+ override def toString: String = iterator.toSet.mkString("<", ",", ">")
+
+ /**
+ * An iterator for the elements of this bit set. Note that only one iterator can be used at a
+ * time. Also make sure not to change the underlying AliasSet during iteration.
+ */
+ def iterator: IntIterator = andNotIterator(this, empty, null)
+
+ def +=(value: Int): Unit = this.set match {
+ case s: SmallBitSet => (size: @switch) match {
+ case 0 => s.a = value; size = 1
+ case 1 => if (value != s.a) { s.b = value; size = 2 }
+ case 2 => if (value != s.a && value != s.b) { s.c = value; size = 3 }
+ case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 }
+ case 4 =>
+ if (value != s.a && value != s.b && value != s.c && value != s.d) {
+ this.set = bsEmpty
+ this.size = 0
+ bsAdd(this, s.a)
+ bsAdd(this, s.b)
+ bsAdd(this, s.c)
+ bsAdd(this, s.d)
+ bsAdd(this, value)
+ }
+ }
+ case bits: Array[Long] =>
+ bsAdd(this, value)
+ }
+
+ def -=(value: Int): Unit = this.set match {
+ case s: SmallBitSet => (size: @switch) match {
+ case 0 =>
+ case 1 =>
+ if (value == s.a) { s.a = -1; size = 0 }
+ case 2 =>
+ if (value == s.a) { s.a = s.b; s.b = -1; size = 1 }
+ else if (value == s.b) { s.b = -1; size = 1 }
+ case 3 =>
+ if (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 }
+ else if (value == s.b) { s.b = s.c; s.c = -1; size = 2 }
+ else if (value == s.c) { s.c = -1; size = 2 }
+ case 4 =>
+ if (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+ else if (value == s.b) { s.b = s.c; s.c = s.d; s.d = -1; size = 3 }
+ else if (value == s.c) { s.c = s.d; s.d = -1; size = 3 }
+ else if (value == s.d) { s.d = -1; size = 3 }
+ }
+ case bits: Array[Long] =>
+ bsRemove(this, value)
+ if (this.size == 4)
+ this.set = bsToSmall(this.set.asInstanceOf[Array[Long]])
+ }
+
+ override def clone(): AliasSet = {
+ val resSet = this.set match {
+ case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d)
+ case bits: Array[Long] => bits.clone()
+ }
+ new AliasSet(resSet, this.size)
+ }
+}
+
+object AliasSet {
+ def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0)
+
+ final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) {
+ override def toString = s"($a, $b, $c, $d)"
+ }
+
+ def bsEmpty: Array[Long] = new Array[Long](1)
+
+ private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = {
+ if (index < set.length) set
+ else {
+ var newLength = set.length
+ while (index >= newLength) newLength *= 2
+ val newSet = new Array[Long](newLength)
+ Array.copy(set, 0, newSet, 0, set.length)
+ newSet
+ }
+ }
+
+ def bsAdd(set: AliasSet, bit: Int): Unit = {
+ val bits = set.set.asInstanceOf[Array[Long]]
+ val index = bit >> 6
+ val resSet = bsEnsureCapacity(bits, index)
+ val before = resSet(index)
+ val result = before | (1l << bit)
+ if (result != before) {
+ resSet(index) = result
+ set.set = resSet
+ set.size += 1
+ }
+ }
+
+ def bsRemove(set: AliasSet, bit: Int): Unit = {
+ val bits = set.set.asInstanceOf[Array[Long]]
+ val index = bit >> 6
+ if (index < bits.length) {
+ val before = bits(index)
+ val result = before & ~(1l << bit)
+ if (result != before) {
+ bits(index) = result
+ set.size -= 1
+ }
+ }
+ }
+
+ def bsContains(set: Array[Long], bit: Int): Boolean = {
+ val index = bit >> 6
+ bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L
+ }
+
+// var sizesHist: Array[Int] = new Array[Int](1000)
+
+ /**
+ * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits.
+ */
+ def bsToSmall(bits: Array[Long]): SmallBitSet = {
+ var a = -1
+ var b = -1
+ var c = -1
+ var i = 0
+ val end = bits.length * 64
+ while (i < end) {
+ if (bsContains(bits, i)) {
+ if (a == -1) a = i
+ else if (b == -1) b = i
+ else if (c == -1) c = i
+ else return new SmallBitSet(a, b, c, i)
+ }
+ i += 1
+ }
+ null
+ }
+
+ /**
+ * An iterator that yields the elements that are in one bit set and not in another (&~).
+ */
+ private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator {
+ // values in the first bit set
+ private var a, b, c, d = -1
+ private var xs: Array[Long] = null
+
+ // values in the second bit set
+ private var notA, notB, notC, notD = -1
+ private var notXs: Array[Long] = null
+
+ // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext
+ private var abcdNext = -1
+
+ // counts through elements in the `xs` bit set
+ private var i = 0
+ // true if the current value of `i` should be returned by this iterator
+ private var iValid = false
+
+ setA.set match {
+ case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d
+ case bits: Array[Long] => xs = bits
+ }
+
+ setB.set match {
+ case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d
+ case bits: Array[Long] => notXs = bits
+ }
+
+ // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true.
+ // hacky side-effect, used for performance of AliasingFrame.merge.
+ private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true
+
+ private def checkABCD(x: Int, num: Int): Boolean = {
+ // assert(x == a && num == 1 || x == b && num == 2 || ...)
+ x != -1 && {
+ val otherHasA = x == notA || x == notB || x == notC || x == notD || (notXs != null && bsContains(notXs, x))
+ if (otherHasA) setThisAndOther(x)
+ else abcdNext = x
+ (num: @switch) match {
+ case 1 => a = -1
+ case 2 => b = -1
+ case 3 => c = -1
+ case 4 => d = -1
+ }
+ !otherHasA
+ }
+ }
+
+ // main performance hot spot
+ private def checkXs = {
+ (xs != null) && {
+ val end = xs.length * 64
+
+ while (i < end && {
+ val index = i >> 6
+ if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time.
+ i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body
+ true
+ } else {
+ val mask = 1l << i
+ // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks
+ val thisHasI = (xs(index) & mask) != 0l
+ !thisHasI || {
+ val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l)
+ if (otherHasI) setThisAndOther(i)
+ otherHasI
+ }
+ }
+ }) i += 1
+
+ iValid = i < end
+ iValid
+ }
+ }
+
+ // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time
+ // is spent here. within hasNext, almost the entire time is spent in `checkXs`.
+ //
+ def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs
+
+ def next(): Int = {
+ if (hasNext) {
+ if (abcdNext != -1) {
+ val r = abcdNext; abcdNext = -1; r
+ } else {
+ val r = i; i += 1; iValid = false; r
+ }
+ } else Iterator.empty.next()
+ }
+ }
+
+// The number of bits in a bit array. Useful for debugging.
+// def bsSize(bits: Array[Long]) = {
+// var r = 0
+// var i = 0
+// while (i < bits.length) {
+// r += java.lang.Long.bitCount(bits(i))
+// i += 1
+// }
+// r
+// }
+
+ /**
+ * An iterator returning the elements in a that are not also in b (a &~ b).
+ *
+ * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that
+ * is both in a and b (&).
+ */
+ def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther)
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
new file mode 100644
index 0000000000..90da570f01
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala
@@ -0,0 +1,514 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import java.lang.invoke.LambdaMetafactory
+
+import scala.annotation.switch
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.tools.asm.Opcodes._
+import scala.tools.asm.tree._
+import scala.tools.asm.tree.analysis._
+import scala.tools.asm.{Handle, Type}
+import scala.tools.nsc.backend.jvm.BTypes._
+import scala.tools.nsc.backend.jvm.GenBCode._
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+
+/**
+ * This component hosts tools and utilities used in the backend that require access to a `BTypes`
+ * instance.
+ *
+ * One example is the AsmAnalyzer class, which runs `computeMaxLocalsMaxStack` on the methodNode to
+ * be analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run
+ * cache `maxLocalsMaxStackComputed` defined in there.
+ */
+class BackendUtils[BT <: BTypes](val btypes: BT) {
+ import btypes._
+ import btypes.coreBTypes._
+ import callGraph.ClosureInstantiation
+
+ /**
+ * A wrapper to make ASM's Analyzer a bit easier to use.
+ */
+ class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) {
+ computeMaxLocalsMaxStack(methodNode)
+ try {
+ analyzer.analyze(classInternalName, methodNode)
+ } catch {
+ case ae: AnalyzerException =>
+ throw new AnalyzerException(null, "While processing " + classInternalName + "." + methodNode.name, ae)
+ }
+ def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode)
+ }
+
+ /**
+ * See the doc comment on package object `analysis` for a discussion on performance.
+ */
+ object AsmAnalyzer {
+ // jvm limit is 65535 for both number of instructions and number of locals
+
+ private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals
+
+ // with the limits below, analysis should not take more than one second
+
+ private val nullnessSizeLimit = 5000l * 600l * 600l // 5000 insns, 600 locals
+ private val basicValueSizeLimit = 9000l * 1000l * 1000l
+ private val sourceValueSizeLimit = 8000l * 950l * 950l
+
+ def sizeOKForAliasing(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
+ def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit
+ def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit
+ def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit
+ }
+
+ class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl
+
+ class NonLubbingTypeFlowAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new NonLubbingTypeFlowInterpreter))
+
+ /**
+ * Add:
+ * private static Object $deserializeLambda$(SerializedLambda l) {
+ * return indy[scala.runtime.LambdaDeserialize.bootstrap](l)
+ * }
+ *
+ * We use invokedynamic here to enable caching within the deserializer without needing to
+ * host a static field in the enclosing class. This allows us to add this method to interfaces
+ * that define lambdas in default methods.
+ */
+ def addLambdaDeserialize(classNode: ClassNode, implMethods: Iterable[Handle]): Unit = {
+ val cw = classNode
+
+ // Make sure to reference the ClassBTypes of all types that are used in the code generated
+ // here (e.g. java/util/Map) are initialized. Initializing a ClassBType adds it to the
+ // `classBTypeFromInternalName` map. When writing the classfile, the asm ClassWriter computes
+ // stack map frames and invokes the `getCommonSuperClass` method. This method expects all
+ // ClassBTypes mentioned in the source code to exist in the map.
+
+ val nilLookupDesc = MethodBType(Nil, jliMethodHandlesLookupRef).descriptor
+ val serlamObjDesc = MethodBType(jliSerializedLambdaRef :: Nil, ObjectRef).descriptor
+
+ {
+ val mv = cw.visitMethod(ACC_PRIVATE + ACC_STATIC + ACC_SYNTHETIC, "$deserializeLambda$", serlamObjDesc, null, null)
+ mv.visitCode()
+ mv.visitVarInsn(ALOAD, 0)
+ mv.visitInvokeDynamicInsn("lambdaDeserialize", serlamObjDesc, lambdaDeserializeBootstrapHandle, implMethods.toArray: _*)
+ mv.visitInsn(ARETURN)
+ mv.visitEnd()
+ }
+ }
+
+ /**
+ * Clone the instructions in `methodNode` into a new [[InsnList]], mapping labels according to
+ * the `labelMap`. Returns the new instruction list and a map from old to new instructions, and
+ * a list of lambda implementation methods references by invokedynamic[LambdaMetafactory] for a
+ * serializable SAM types.
+ */
+ def cloneInstructions(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode], keepLineNumbers: Boolean): (InsnList, Map[AbstractInsnNode, AbstractInsnNode], List[Handle]) = {
+ val javaLabelMap = labelMap.asJava
+ val result = new InsnList
+ var map = Map.empty[AbstractInsnNode, AbstractInsnNode]
+ var inlinedTargetHandles = mutable.ListBuffer[Handle]()
+ for (ins <- methodNode.instructions.iterator.asScala) {
+ ins match {
+ case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => indy.bsmArgs match {
+ case Array(_, targetHandle: Handle, _, flags: Integer, xs@_*) if (flags.intValue & LambdaMetafactory.FLAG_SERIALIZABLE) != 0 =>
+ inlinedTargetHandles += targetHandle
+ case _ =>
+ }
+ case _ =>
+ }
+ if (keepLineNumbers || !ins.isInstanceOf[LineNumberNode]) {
+ val cloned = ins.clone(javaLabelMap)
+ result add cloned
+ map += ((ins, cloned))
+ }
+ }
+ (result, map, inlinedTargetHandles.toList)
+ }
+
+ def getBoxedUnit: FieldInsnNode = new FieldInsnNode(GETSTATIC, srBoxedUnitRef.internalName, "UNIT", srBoxedUnitRef.descriptor)
+
+ private val anonfunAdaptedName = """.*\$anonfun\$.*\$\d+\$adapted""".r
+ def hasAdaptedImplMethod(closureInit: ClosureInstantiation): Boolean = {
+ anonfunAdaptedName.pattern.matcher(closureInit.lambdaMetaFactoryCall.implMethod.getName).matches
+ }
+
+ private def primitiveAsmTypeToBType(primitiveType: Type): PrimitiveBType = (primitiveType.getSort: @switch) match {
+ case Type.BOOLEAN => BOOL
+ case Type.BYTE => BYTE
+ case Type.CHAR => CHAR
+ case Type.SHORT => SHORT
+ case Type.INT => INT
+ case Type.LONG => LONG
+ case Type.FLOAT => FLOAT
+ case Type.DOUBLE => DOUBLE
+ case _ => null
+ }
+
+ def isScalaBox(insn: MethodInsnNode): Boolean = {
+ insn.owner == srBoxesRunTimeRef.internalName && {
+ val args = Type.getArgumentTypes(insn.desc)
+ args.length == 1 && (srBoxesRuntimeBoxToMethods.get(primitiveAsmTypeToBType(args(0))) match {
+ case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc
+ case _ => false
+ })
+ }
+ }
+
+ def getScalaBox(primitiveType: Type): MethodInsnNode = {
+ val bType = primitiveAsmTypeToBType(primitiveType)
+ val MethodNameAndType(name, methodBType) = srBoxesRuntimeBoxToMethods(bType)
+ new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false)
+ }
+
+ def isScalaUnbox(insn: MethodInsnNode): Boolean = {
+ insn.owner == srBoxesRunTimeRef.internalName && (srBoxesRuntimeUnboxToMethods.get(primitiveAsmTypeToBType(Type.getReturnType(insn.desc))) match {
+ case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc
+ case _ => false
+ })
+ }
+
+ def getScalaUnbox(primitiveType: Type): MethodInsnNode = {
+ val bType = primitiveAsmTypeToBType(primitiveType)
+ val MethodNameAndType(name, methodBType) = srBoxesRuntimeUnboxToMethods(bType)
+ new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false)
+ }
+
+ private def calleeInMap(insn: MethodInsnNode, map: Map[InternalName, MethodNameAndType]): Boolean = map.get(insn.owner) match {
+ case Some(MethodNameAndType(name, tp)) => insn.name == name && insn.desc == tp.descriptor
+ case _ => false
+ }
+
+ def isJavaBox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaBoxMethods)
+ def isJavaUnbox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaUnboxMethods)
+
+ def isPredefAutoBox(insn: MethodInsnNode): Boolean = {
+ insn.owner == PredefRef.internalName && (predefAutoBoxMethods.get(insn.name) match {
+ case Some(tp) => insn.desc == tp.descriptor
+ case _ => false
+ })
+ }
+
+ def isPredefAutoUnbox(insn: MethodInsnNode): Boolean = {
+ insn.owner == PredefRef.internalName && (predefAutoUnboxMethods.get(insn.name) match {
+ case Some(tp) => insn.desc == tp.descriptor
+ case _ => false
+ })
+ }
+
+ def isRefCreate(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefCreateMethods)
+ def isRefZero(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefZeroMethods)
+
+ def runtimeRefClassBoxedType(refClass: InternalName): Type = Type.getArgumentTypes(srRefCreateMethods(refClass).methodType.descriptor)(0)
+
+ def isSideEffectFreeCall(insn: MethodInsnNode): Boolean = {
+ isScalaBox(insn) || isScalaUnbox(insn) ||
+ isJavaBox(insn) || // not java unbox, it may NPE
+ isSideEffectFreeConstructorCall(insn)
+ }
+
+ def isNonNullMethodInvocation(mi: MethodInsnNode): Boolean = {
+ isJavaBox(mi) || isScalaBox(mi) || isPredefAutoBox(mi) || isRefCreate(mi) || isRefZero(mi)
+ }
+
+ def isModuleLoad(insn: AbstractInsnNode, moduleName: InternalName): Boolean = insn match {
+ case fi: FieldInsnNode => fi.getOpcode == GETSTATIC && fi.owner == moduleName && fi.name == "MODULE$" && fi.desc == ("L" + moduleName + ";")
+ case _ => false
+ }
+
+ def isPredefLoad(insn: AbstractInsnNode) = isModuleLoad(insn, PredefRef.internalName)
+
+ def isPrimitiveBoxConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, primitiveBoxConstructors)
+ def isRuntimeRefConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefConstructors)
+ def isTupleConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, tupleClassConstructors)
+
+ // unused objects created by these constructors are eliminated by pushPop
+ private lazy val sideEffectFreeConstructors: Set[(String, String)] = {
+ val ownerDesc = (p: (InternalName, MethodNameAndType)) => (p._1, p._2.methodType.descriptor)
+ primitiveBoxConstructors.map(ownerDesc).toSet ++
+ srRefConstructors.map(ownerDesc) ++
+ tupleClassConstructors.map(ownerDesc) ++ Set(
+ (ObjectRef.internalName, MethodBType(Nil, UNIT).descriptor),
+ (StringRef.internalName, MethodBType(Nil, UNIT).descriptor),
+ (StringRef.internalName, MethodBType(List(StringRef), UNIT).descriptor),
+ (StringRef.internalName, MethodBType(List(ArrayBType(CHAR)), UNIT).descriptor))
+ }
+
+ def isSideEffectFreeConstructorCall(insn: MethodInsnNode): Boolean = {
+ insn.name == INSTANCE_CONSTRUCTOR_NAME && sideEffectFreeConstructors((insn.owner, insn.desc))
+ }
+
+ private lazy val classesOfSideEffectFreeConstructors = sideEffectFreeConstructors.map(_._1)
+
+ def isNewForSideEffectFreeConstructor(insn: AbstractInsnNode) = {
+ insn.getOpcode == NEW && {
+ val ti = insn.asInstanceOf[TypeInsnNode]
+ classesOfSideEffectFreeConstructors.contains(ti.desc)
+ }
+ }
+
+ def isBoxedUnit(insn: AbstractInsnNode) = {
+ insn.getOpcode == GETSTATIC && {
+ val fi = insn.asInstanceOf[FieldInsnNode]
+ fi.owner == srBoxedUnitRef.internalName && fi.name == "UNIT" && fi.desc == srBoxedUnitRef.descriptor
+ }
+ }
+
+ /**
+ * Visit the class node and collect all referenced nested classes.
+ */
+ def collectNestedClasses(classNode: ClassNode): List[ClassBType] = {
+ val innerClasses = mutable.Set.empty[ClassBType]
+
+ def visitInternalName(internalName: InternalName): Unit = if (internalName != null) {
+ val t = classBTypeFromParsedClassfile(internalName)
+ if (t.isNestedClass.get) innerClasses += t
+ }
+
+ // either an internal/Name or [[Linternal/Name; -- there are certain references in classfiles
+ // that are either an internal name (without the surrounding `L;`) or an array descriptor
+ // `[Linternal/Name;`.
+ def visitInternalNameOrArrayReference(ref: String): Unit = if (ref != null) {
+ val bracket = ref.lastIndexOf('[')
+ if (bracket == -1) visitInternalName(ref)
+ else if (ref.charAt(bracket + 1) == 'L') visitInternalName(ref.substring(bracket + 2, ref.length - 1))
+ }
+
+ // we are only interested in the class references in the descriptor, so we can skip over
+ // primitives and the brackets of array descriptors
+ def visitDescriptor(desc: String): Unit = (desc.charAt(0): @switch) match {
+ case '(' =>
+ val internalNames = mutable.ListBuffer.empty[String]
+ var i = 1
+ while (i < desc.length) {
+ if (desc.charAt(i) == 'L') {
+ val start = i + 1 // skip the L
+ while (desc.charAt(i) != ';') i += 1
+ internalNames append desc.substring(start, i)
+ }
+ // skips over '[', ')', primitives
+ i += 1
+ }
+ internalNames foreach visitInternalName
+
+ case 'L' =>
+ visitInternalName(desc.substring(1, desc.length - 1))
+
+ case '[' =>
+ visitInternalNameOrArrayReference(desc)
+
+ case _ => // skip over primitive types
+ }
+
+ def visitConstant(const: AnyRef): Unit = const match {
+ case t: Type => visitDescriptor(t.getDescriptor)
+ case _ =>
+ }
+
+ // in principle we could references to annotation types, as they only end up as strings in the
+ // constant pool, not as class references. however, the java compiler still includes nested
+ // annotation classes in the innerClass table, so we do the same. explained in detail in the
+ // large comment in class BTypes.
+ def visitAnnotation(annot: AnnotationNode): Unit = {
+ visitDescriptor(annot.desc)
+ if (annot.values != null) annot.values.asScala foreach visitConstant
+ }
+
+ def visitAnnotations(annots: java.util.List[_ <: AnnotationNode]) = if (annots != null) annots.asScala foreach visitAnnotation
+ def visitAnnotationss(annotss: Array[java.util.List[AnnotationNode]]) = if (annotss != null) annotss foreach visitAnnotations
+
+ def visitHandle(handle: Handle): Unit = {
+ visitInternalNameOrArrayReference(handle.getOwner)
+ visitDescriptor(handle.getDesc)
+ }
+
+ visitInternalName(classNode.name)
+ innerClasses ++= classBTypeFromParsedClassfile(classNode.name).info.get.nestedClasses
+
+ visitInternalName(classNode.superName)
+ classNode.interfaces.asScala foreach visitInternalName
+ visitInternalName(classNode.outerClass)
+
+ visitAnnotations(classNode.visibleAnnotations)
+ visitAnnotations(classNode.visibleTypeAnnotations)
+ visitAnnotations(classNode.invisibleAnnotations)
+ visitAnnotations(classNode.invisibleTypeAnnotations)
+
+ for (f <- classNode.fields.asScala) {
+ visitDescriptor(f.desc)
+ visitAnnotations(f.visibleAnnotations)
+ visitAnnotations(f.visibleTypeAnnotations)
+ visitAnnotations(f.invisibleAnnotations)
+ visitAnnotations(f.invisibleTypeAnnotations)
+ }
+
+ for (m <- classNode.methods.asScala) {
+ visitDescriptor(m.desc)
+
+ visitAnnotations(m.visibleAnnotations)
+ visitAnnotations(m.visibleTypeAnnotations)
+ visitAnnotations(m.invisibleAnnotations)
+ visitAnnotations(m.invisibleTypeAnnotations)
+ visitAnnotationss(m.visibleParameterAnnotations)
+ visitAnnotationss(m.invisibleParameterAnnotations)
+ visitAnnotations(m.visibleLocalVariableAnnotations)
+ visitAnnotations(m.invisibleLocalVariableAnnotations)
+
+ m.exceptions.asScala foreach visitInternalName
+ for (tcb <- m.tryCatchBlocks.asScala) visitInternalName(tcb.`type`)
+
+ val iter = m.instructions.iterator()
+ while (iter.hasNext) iter.next() match {
+ case ti: TypeInsnNode => visitInternalNameOrArrayReference(ti.desc)
+ case fi: FieldInsnNode => visitInternalNameOrArrayReference(fi.owner); visitDescriptor(fi.desc)
+ case mi: MethodInsnNode => visitInternalNameOrArrayReference(mi.owner); visitDescriptor(mi.desc)
+ case id: InvokeDynamicInsnNode => visitDescriptor(id.desc); visitHandle(id.bsm); id.bsmArgs foreach visitConstant
+ case ci: LdcInsnNode => visitConstant(ci.cst)
+ case ma: MultiANewArrayInsnNode => visitDescriptor(ma.desc)
+ case _ =>
+ }
+ }
+ innerClasses.toList
+ }
+
+ /**
+ * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM
+ * framework only computes these values during bytecode generation.
+ *
+ * NOTE 1: as explained in the `analysis` package object, the maxStack value used by the Analyzer
+ * may be smaller than the correct maxStack value in the classfile (Analyzers only use a single
+ * slot for long / double values). The maxStack computed here are correct for running an analyzer,
+ * but not for writing in the classfile. We let the ClassWriter recompute max's.
+ *
+ * NOTE 2: the maxStack value computed here may be larger than the smallest correct value
+ * that would allow running an analyzer, see `InstructionStackEffect.forAsmAnalysis` and
+ * `InstructionStackEffect.maxStackGrowth`.
+ *
+ * NOTE 3: the implementation doesn't look at instructions that cannot be reached, it computes
+ * the max local / stack size in the reachable code. These max's work just fine for running an
+ * Analyzer: its implementation also skips over unreachable code in the same way.
+ */
+ def computeMaxLocalsMaxStack(method: MethodNode): Unit = {
+ if (isAbstractMethod(method) || isNativeMethod(method)) {
+ method.maxLocals = 0
+ method.maxStack = 0
+ } else if (!maxLocalsMaxStackComputed(method)) {
+ val size = method.instructions.size
+
+ var maxLocals = parametersSize(method)
+ var maxStack = 0
+
+ // queue of instruction indices where analysis should start
+ var queue = new Array[Int](8)
+ var top = -1
+ def enq(i: Int): Unit = {
+ if (top == queue.length - 1) {
+ val nq = new Array[Int](queue.length * 2)
+ Array.copy(queue, 0, nq, 0, queue.length)
+ queue = nq
+ }
+ top += 1
+ queue(top) = i
+ }
+ def deq(): Int = {
+ val r = queue(top)
+ top -= 1
+ r
+ }
+
+ val subroutineRetTargets = new mutable.Stack[AbstractInsnNode]
+
+ // for each instruction in the queue, contains the stack height at this instruction.
+ // once an instruction has been treated, contains -1 to prevent re-enqueuing
+ val stackHeights = new Array[Int](size)
+
+ def enqInsn(insn: AbstractInsnNode, height: Int): Unit = {
+ enqInsnIndex(method.instructions.indexOf(insn), height)
+ }
+
+ def enqInsnIndex(insnIndex: Int, height: Int): Unit = {
+ if (insnIndex < size && stackHeights(insnIndex) != -1) {
+ stackHeights(insnIndex) = height
+ enq(insnIndex)
+ }
+ }
+
+ val tcbIt = method.tryCatchBlocks.iterator()
+ while (tcbIt.hasNext) {
+ val tcb = tcbIt.next()
+ enqInsn(tcb.handler, 1)
+ if (maxStack == 0) maxStack = 1
+ }
+
+ enq(0)
+ while (top != -1) {
+ val insnIndex = deq()
+ val insn = method.instructions.get(insnIndex)
+ val initHeight = stackHeights(insnIndex)
+ stackHeights(insnIndex) = -1 // prevent i from being enqueued again
+
+ if (insn.getOpcode == -1) { // frames, labels, line numbers
+ enqInsnIndex(insnIndex + 1, initHeight)
+ } else {
+ val stackGrowth = InstructionStackEffect.maxStackGrowth(insn)
+ val heightAfter = initHeight + stackGrowth
+ if (heightAfter > maxStack) maxStack = heightAfter
+
+ // update maxLocals
+ insn match {
+ case v: VarInsnNode =>
+ val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0
+ maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 because local numbers are 0-based
+
+ case i: IincInsnNode =>
+ maxLocals = math.max(maxLocals, i.`var` + 1)
+
+ case _ =>
+ }
+
+ insn match {
+ case j: JumpInsnNode =>
+ if (j.getOpcode == JSR) {
+ val jsrTargetHeight = heightAfter + 1
+ if (jsrTargetHeight > maxStack) maxStack = jsrTargetHeight
+ subroutineRetTargets.push(j.getNext)
+ enqInsn(j.label, jsrTargetHeight)
+ } else {
+ enqInsn(j.label, heightAfter)
+ val opc = j.getOpcode
+ if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target
+ }
+
+ case l: LookupSwitchInsnNode =>
+ var j = 0
+ while (j < l.labels.size) {
+ enqInsn(l.labels.get(j), heightAfter); j += 1
+ }
+ enqInsn(l.dflt, heightAfter)
+
+ case t: TableSwitchInsnNode =>
+ var j = 0
+ while (j < t.labels.size) {
+ enqInsn(t.labels.get(j), heightAfter); j += 1
+ }
+ enqInsn(t.dflt, heightAfter)
+
+ case r: VarInsnNode if r.getOpcode == RET =>
+ enqInsn(subroutineRetTargets.pop(), heightAfter)
+
+ case _ =>
+ val opc = insn.getOpcode
+ if (opc != ATHROW && !isReturn(insn))
+ enqInsnIndex(insnIndex + 1, heightAfter)
+ }
+ }
+ }
+
+ method.maxLocals = maxLocals
+ method.maxStack = maxStack
+
+ maxLocalsMaxStackComputed += method
+ }
+ }
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
index 8d8ea839e6..dd19ad594f 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala
@@ -5,35 +5,74 @@ package analysis
import scala.annotation.switch
import scala.tools.asm.Opcodes._
import scala.tools.asm.Type
-import scala.tools.asm.tree.{MultiANewArrayInsnNode, InvokeDynamicInsnNode, MethodInsnNode, AbstractInsnNode}
+import scala.tools.asm.tree._
import scala.tools.asm.tree.analysis.{Frame, Value}
import opt.BytecodeUtils._
-import collection.immutable
object InstructionStackEffect {
- private var cache: immutable.IntMap[(Int, Int)] = immutable.IntMap.empty
- private def t(x: Int, y: Int): (Int, Int) = {
- // x can go up to 255 (number of parameters of a method, dimensions in multianewarray) we cache
- // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases).
- if (x > 10 || y > 6) (x, y)
- else {
- val key = (x << 8) + y // this would work for any x < 256
- if (cache contains key) {
- cache(key)
- } else {
- val r = (x, y)
- cache += key -> r
- r
- }
- }
+ val consShift = 3
+ val prodMask = (1 << consShift) - 1
+
+ def cons(i: Int) = i >>> consShift
+ def prod(i: Int) = i & prodMask
+
+ private def t(x: Int, y: Int): Int = (x << consShift) | y
+
+ /**
+ * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int`
+ * (the `cons` / `prod` extract individual values). The returned values are correct for use in
+ * asm's Analyzer framework. For example, a LLOAD instruction produces one stack value. See also
+ * doc in `analysis` package object.
+ *
+ * This method requires the `frame` to be in the state **before** executing / interpreting the
+ * `insn`.
+ */
+ def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): Int = computeConsProd(insn, forClassfile = false, conservative = false, frame = frame)
+
+ /**
+ * Returns the maximal possible growth of the stack when executing `insn`. The returned value
+ * is usually the same as expected by asm's Analyzer framework, but it may be larger. For
+ * example, consider a POP2 instruction:
+ * - if two size-1 values are popped, then the asm Analyzer consumes two values
+ * - if a size-2 value is popped, the asm Analyzer consumes only one stack slot (see doc in the
+ * `analysis` package object)
+ *
+ * If a precise result is needed, invoke the `forAsmAnalysis` and provide a `frame` value that
+ * allows looking up the sizes of values on the stack.
+ */
+ def maxStackGrowth(insn: AbstractInsnNode): Int = {
+ val prodCons = computeConsProd(insn, forClassfile = false, conservative = true)
+ prod(prodCons) - cons(prodCons)
}
/**
- * Returns a pair with the number of stack values consumed and produced by `insn`.
- * This method requires the `frame` to be in the state **before** executing / interpreting
- * the `insn`.
+ * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int`
+ * (the `cons` / `prod` extract individual values). The returned values are correct for writing
+ * into a classfile (see doc on the `analysis` package object).
*/
- def apply[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = {
+ def forClassfile(insn: AbstractInsnNode): Int = computeConsProd(insn, forClassfile = true, conservative = false)
+
+ private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): Int = {
+ val consumesReceiver = insn.getOpcode != INVOKESTATIC && insn.getOpcode != INVOKEDYNAMIC
+ if (forClassfile) {
+ val sizes = Type.getArgumentsAndReturnSizes(methodDesc)
+ val cons = (sizes >> 2) - (if (consumesReceiver) 0 else 1)
+ val prod = sizes & 0x03
+ t(cons, prod)
+ } else {
+ val cons = Type.getArgumentTypes(methodDesc).length + (if (consumesReceiver) 1 else 0)
+ val prod = if (Type.getReturnType(methodDesc) == Type.VOID_TYPE) 0 else 1
+ t(cons, prod)
+ }
+ }
+
+ private def fieldInsnIsLongOrDouble(insn: AbstractInsnNode) = {
+ val d = insn.asInstanceOf[FieldInsnNode].desc
+ d == "J" || d == "D"
+ }
+
+ private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean, conservative: Boolean, frame: Frame[V] = null): Int = {
+ // not used if `forClassfile || conservative`: in these cases, `frame` is allowed to be `null`
def peekStack(n: Int): V = frame.peekStack(n)
(insn.getOpcode: @switch) match {
@@ -48,142 +87,176 @@ object InstructionStackEffect {
ICONST_3 |
ICONST_4 |
ICONST_5 |
- LCONST_0 |
- LCONST_1 |
FCONST_0 |
FCONST_1 |
FCONST_2 |
- DCONST_0 |
- DCONST_1 |
BIPUSH |
SIPUSH |
- LDC |
ILOAD |
- LLOAD |
FLOAD |
- DLOAD |
ALOAD => t(0, 1)
+ case LDC =>
+ if (forClassfile) insn.asInstanceOf[LdcInsnNode].cst match {
+ case _: java.lang.Long | _: java.lang.Double => t(0, 2)
+ case _ => t(0, 1)
+ } else
+ t(0, 1)
+
+ case LCONST_0 |
+ LCONST_1 |
+ DCONST_0 |
+ DCONST_1 |
+ LLOAD |
+ DLOAD => if (forClassfile) t(0, 2) else t(0, 1)
+
case IALOAD |
- LALOAD |
FALOAD |
- DALOAD |
AALOAD |
BALOAD |
CALOAD |
SALOAD => t(2, 1)
+ case LALOAD |
+ DALOAD => if (forClassfile) t(2, 2) else t(2, 1)
+
case ISTORE |
- LSTORE |
FSTORE |
- DSTORE |
ASTORE => t(1, 0)
+ case LSTORE |
+ DSTORE => if (forClassfile) t(2, 0) else t(1, 0)
+
case IASTORE |
- LASTORE |
FASTORE |
- DASTORE |
AASTORE |
BASTORE |
CASTORE |
SASTORE => t(3, 0)
+ case LASTORE |
+ DASTORE => if (forClassfile) t(4, 0) else t(3, 0)
+
case POP => t(1, 0)
case POP2 =>
- val isSize2 = peekStack(0).getSize == 2
- if (isSize2) t(1, 0) else t(2, 0)
+ if (forClassfile) t(2, 0)
+ else if (conservative) t(1, 0)
+ else {
+ val isSize2 = peekStack(0).getSize == 2
+ if (isSize2) t(1, 0) else t(2, 0)
+ }
case DUP => t(1, 2)
case DUP_X1 => t(2, 3)
case DUP_X2 =>
- val isSize2 = peekStack(1).getSize == 2
- if (isSize2) t(2, 3) else t(3, 4)
+ if (forClassfile || conservative) t(3, 4)
+ else {
+ val isSize2 = peekStack(1).getSize == 2
+ if (isSize2) t(2, 3) else t(3, 4)
+ }
case DUP2 =>
- val isSize2 = peekStack(0).getSize == 2
- if (isSize2) t(1, 2) else t(2, 4)
+ if (forClassfile || conservative) t(2, 4)
+ else {
+ val isSize2 = peekStack(0).getSize == 2
+ if (isSize2) t(1, 2) else t(2, 4)
+ }
case DUP2_X1 =>
- val isSize2 = peekStack(0).getSize == 2
- if (isSize2) t(2, 3) else t(3, 4)
+ if (forClassfile || conservative) t(3, 5)
+ else {
+ val isSize2 = peekStack(0).getSize == 2
+ if (isSize2) t(2, 3) else t(3, 5)
+ }
case DUP2_X2 =>
- val v1isSize2 = peekStack(0).getSize == 2
- if (v1isSize2) {
- val v2isSize2 = peekStack(1).getSize == 2
- if (v2isSize2) t(2, 3) else t(3, 4)
- } else {
- val v3isSize2 = peekStack(2).getSize == 2
- if (v3isSize2) t(3, 5) else t(4, 6)
+ if (forClassfile || conservative) t(4, 6)
+ else {
+ val v1isSize2 = peekStack(0).getSize == 2
+ if (v1isSize2) {
+ val v2isSize2 = peekStack(1).getSize == 2
+ if (v2isSize2) t(2, 3) else t(3, 4)
+ } else {
+ val v3isSize2 = peekStack(2).getSize == 2
+ if (v3isSize2) t(3, 5) else t(4, 6)
+ }
}
case SWAP => t(2, 2)
case IADD |
- LADD |
FADD |
- DADD |
ISUB |
- LSUB |
FSUB |
- DSUB |
IMUL |
- LMUL |
FMUL |
- DMUL |
IDIV |
- LDIV |
FDIV |
- DDIV |
IREM |
+ FREM => t(2, 1)
+
+ case LADD |
+ DADD |
+ LSUB |
+ DSUB |
+ LMUL |
+ DMUL |
+ LDIV |
+ DDIV |
LREM |
- FREM |
- DREM => t(2, 1)
+ DREM => if (forClassfile) t(4, 2) else t(2, 1)
case INEG |
- LNEG |
- FNEG |
- DNEG => t(1, 1)
+ FNEG => t(1, 1)
+
+ case LNEG |
+ DNEG => if (forClassfile) t(2, 2) else t(1, 1)
case ISHL |
- LSHL |
ISHR |
- LSHR |
IUSHR |
- LUSHR |
IAND |
- LAND |
IOR |
+ IXOR => t(2, 1)
+
+ case LSHL |
+ LSHR |
+ LUSHR => if (forClassfile) t(3, 2) else t(2, 1)
+
+ case LAND |
LOR |
- IXOR |
- LXOR => t(2, 1)
+ LXOR => if (forClassfile) t(4, 2) else t(2, 1)
case IINC => t(0, 0)
- case I2L |
- I2F |
- I2D |
- L2I |
- L2F |
- L2D |
+ case I2F |
F2I |
- F2L |
- F2D |
- D2I |
- D2L |
- D2F |
I2B |
I2C |
I2S => t(1, 1)
+ case I2L |
+ I2D |
+ F2L |
+ F2D => if (forClassfile) t(1, 2) else t(1, 1)
+
+ case L2I |
+ L2F |
+ D2I |
+ D2F => if (forClassfile) t(2, 1) else t(1, 1)
+
+ case L2D |
+ D2L => if (forClassfile) t(2, 2) else t(1, 1)
+
+ case FCMPL |
+ FCMPG => t(2, 1)
+
case LCMP |
- FCMPL |
- FCMPG |
DCMPL |
- DCMPG => t(2, 1)
+ DCMPG => if (forClassfile) t(4, 1) else t(2, 1)
case IFEQ |
IFNE |
@@ -211,35 +284,36 @@ object InstructionStackEffect {
LOOKUPSWITCH => t(1, 0)
case IRETURN |
- LRETURN |
FRETURN |
- DRETURN |
ARETURN => t(1, 0) // Frame.execute consumes one stack value
+ case LRETURN |
+ DRETURN => if (forClassfile) t(2, 0) else t(1, 0)
+
case RETURN => t(0, 0) // Frame.execute does not change the stack
- case GETSTATIC => t(0, 1)
+ case GETSTATIC =>
+ val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+ t(0, prod)
- case PUTSTATIC => t(1, 0)
+ case PUTSTATIC =>
+ val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+ t(cons, 0)
- case GETFIELD => t(1, 1)
+ case GETFIELD =>
+ val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1
+ t(1, prod)
- case PUTFIELD => t(2, 0)
+ case PUTFIELD =>
+ val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 3 else 2
+ t(cons, 0)
case INVOKEVIRTUAL |
INVOKESPECIAL |
INVOKESTATIC |
- INVOKEINTERFACE =>
- val desc = insn.asInstanceOf[MethodInsnNode].desc
- val cons = Type.getArgumentTypes(desc).length + (if (insn.getOpcode == INVOKESTATIC) 0 else 1)
- val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1
- t(cons, prod)
-
- case INVOKEDYNAMIC =>
- val desc = insn.asInstanceOf[InvokeDynamicInsnNode].desc
- val cons = Type.getArgumentTypes(desc).length
- val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1
- t(cons, prod)
+ INVOKEINTERFACE => invokeConsProd(insn.asInstanceOf[MethodInsnNode].desc, insn, forClassfile)
+
+ case INVOKEDYNAMIC => invokeConsProd(insn.asInstanceOf[InvokeDynamicInsnNode].desc, insn, forClassfile)
case NEW => t(0, 1)
@@ -261,5 +335,4 @@ object InstructionStackEffect {
IFNONNULL => t(1, 0)
}
}
-
}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
index 31b62f747e..01afd0d2ef 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
@@ -5,68 +5,14 @@ package analysis
import java.util
import scala.annotation.switch
-import scala.tools.asm.{Type, Opcodes}
-import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode}
-import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value}
+import scala.tools.asm.{Opcodes, Type}
+import scala.tools.asm.tree.{AbstractInsnNode, LdcInsnNode, MethodInsnNode, MethodNode}
+import scala.tools.asm.tree.analysis._
import scala.tools.nsc.backend.jvm.opt.BytecodeUtils
import BytecodeUtils._
/**
- * Some notes on the ASM analyzer framework.
- *
- * Value
- * - Abstract, needs to be implemented for each analysis.
- * - Represents the desired information about local variables and stack values, for example:
- * - Is this value known to be null / not null?
- * - What are the instructions that could potentially have produced this value?
- *
- * Interpreter
- * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
- * interpreter, e.g., SourceInterpreter or BasicInterpreter.
- * - Multiple abstract methods that receive an instruction and the instruction's input values, and
- * return a value representing the result of that instruction.
- * - Note: due to control flow, the interpreter can be invoked multiple times for the same
- * instruction, until reaching a fixed point.
- * - Abstract `merge` function that computes the least upper bound of two values. Used by
- * Frame.merge (see below).
- *
- * Frame
- * - Can be used directly for many analyses, no subclass required.
- * - Every frame has an array of values: one for each local variable and for each stack slot.
- * - A `top` index stores the index of the current stack top
- * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
- * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
- * stack value.
- * - Defines the `execute(instruction)` method.
- * - executing mutates the state of the frame according to the effect of the instruction
- * - pop consumed values from the stack
- * - pass them to the interpreter together with the instruction
- * - if applicable, push the resulting value on the stack
- * - Defines the `merge(otherFrame)` method
- * - called by the analyzer when multiple control flow paths lead to an instruction
- * - the frame at the branching instruction is merged into the current frame of the
- * instruction (held by the analyzer)
- * - mutates the values of the current frame, merges all values using interpreter.merge.
- *
- * Analyzer
- * - Stores a frame for each instruction
- * - `merge` function takes an instruction and a frame, merges the existing frame for that instr
- * (from the frames array) with the new frame passed as argument.
- * if the frame changed, puts the instruction on the work queue (fixpiont).
- * - initial frame: initialized for first instr by calling interpreter.new[...]Value
- * for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
- * - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
- * - analyze(method): simulate control flow. while work queue non-empty:
- * - copy the state of `frames[instr]` into a local frame `current`
- * - call `current.execute(instr, interpreter)`, mutating the `current` frame
- * - if it's a branching instruction
- * - for all potential destination instructions
- * - merge the destination instruction frame with the `current` frame
- * (this enqueues the destination instr if its frame changed)
- * - invoke `newControlFlowEdge` (see below)
- * - the analyzer also tracks active exception handlers at each instruction
- * - the empty method `newControlFlowEdge` can be overridden to track control flow if required
- *
+ * See the package object `analysis` for details on the ASM analysis framework.
*
* Some notes on nullness analysis.
*
@@ -87,59 +33,37 @@ import BytecodeUtils._
*/
/**
- * Type to represent nullness of values.
- */
-sealed trait Nullness {
- final def merge(other: Nullness) = if (this == other) this else Unknown
-}
-case object NotNull extends Nullness
-case object Unknown extends Nullness
-case object Null extends Nullness
-
-/**
* Represents the nullness state for a local variable or stack value.
*
- * Note that nullness of primitive values is not tracked, it will be always [[Unknown]].
+ * Note that nullness of primitive values is not tracked, it will be always unknown.
*/
-sealed trait NullnessValue extends Value {
- /**
- * The nullness of this value.
- */
- def nullness: Nullness
-
- /**
- * True if this value is a long or double. The Analyzer framework needs to know
- * the size of each value when interpreting instructions, see `Frame.execute`.
- */
- def isSize2: Boolean
+sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value {
/**
* The size of the slot described by this value. Cannot be 0 because no values are allocated
* for void-typed slots, see NullnessInterpreter.newValue.
**/
def getSize: Int = if (isSize2) 2 else 1
- def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2)
+ def merge(other: NullnessValue) = {
+ if (this eq other) this
+ else if (this eq UnknownValue2) this // the only possible value of size two
+ else UnknownValue1
+ }
+
+ final override def equals(other: Any) = this eq other.asInstanceOf[Object]
}
-object NullValue extends NullnessValue { def nullness = Null; def isSize2 = false; override def toString = "Null" }
-object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" }
-object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true; override def toString = "Unknown2" }
-object NotNullValue extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull" }
+object NullValue extends NullnessValue(isSize2 = false) { override def toString = "Null" }
+object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" }
+object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" }
+object NotNullValue extends NullnessValue(isSize2 = false) { override def toString = "NotNull" }
object NullnessValue {
- def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = {
- if (nullness == Null) NullValue
- else if (nullness == NotNull) NotNullValue
- else if (isSize2) UnknownValue2
- else UnknownValue1
- }
-
- def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = {
- apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2)
- }
+ def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1
+ def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1
}
-final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) {
+final class NullnessInterpreter(bTypes: BTypes, method: MethodNode) extends Interpreter[NullnessValue](Opcodes.ASM5) {
def newValue(tp: Type): NullnessValue = {
// ASM loves giving semantics to null. The behavior here is the same as in SourceInterpreter,
// which is provided by the framework.
@@ -151,29 +75,31 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
// (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes
// `newValue(null)` for each local variable. We have to return a value of size 1.
if (tp == Type.VOID_TYPE) null // (1)
- else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
+ else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 )
}
override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = {
// For instance methods, the `this` parameter is known to be not null.
- if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false)
+ val isThis = local == 0 && (isInstanceMethod || {
+ method.parameters != null && !method.parameters.isEmpty && {
+ val p = method.parameters.get(0)
+ (p.access & Opcodes.ACC_SYNTHETIC) != 0 && p.name == "$this"
+ }
+ })
+ if (isThis) NotNullValue
else super.newParameterValue(isInstanceMethod, local, tp)
}
- def newOperation(insn: AbstractInsnNode): NullnessValue = {
- val nullness = (insn.getOpcode: @switch) match {
- case Opcodes.ACONST_NULL => Null
+ def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match {
+ case Opcodes.ACONST_NULL => NullValue
- case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
- case _: String | _: Type => NotNull
- case _ => Unknown
- }
-
- case _ => Unknown
+ case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
+ case _: String | _: Type => NotNullValue
+ case _ => NullnessValue.unknown(insn)
}
// for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call.
- NullnessValue(nullness, insn)
+ case _ => NullnessValue.unknown(insn)
}
def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value
@@ -182,26 +108,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5)
case Opcodes.CHECKCAST => value
case Opcodes.NEWARRAY |
- Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false)
+ Opcodes.ANEWARRAY => NotNullValue
- case _ => NullnessValue(Unknown, insn)
+ case _ => NullnessValue.unknown(insn)
}
def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = {
- NullnessValue(Unknown, insn)
+ NullnessValue.unknown(insn)
}
- def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = {
- NullnessValue(Unknown, isSize2 = false)
- }
+ def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1
- def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match {
- case Opcodes.MULTIANEWARRAY =>
- NullnessValue(NotNull, isSize2 = false)
+ def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = insn match {
+ case mi: MethodInsnNode if bTypes.backendUtils.isNonNullMethodInvocation(mi) =>
+ NotNullValue
case _ =>
- // TODO: use a list of methods that are known to return non-null values
- NullnessValue(Unknown, insn)
+ if (insn.getOpcode == Opcodes.MULTIANEWARRAY) NotNullValue
+ else NullnessValue.unknown(insn)
}
def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = ()
@@ -219,8 +143,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = {
import Opcodes._
- // get the object id of the object that is known to be not-null after this operation
- val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match {
+ // get the alias set the object that is known to be not-null after this operation.
+ // alias sets are mutable / mutated, so after super.execute, this set contains the remaining
+ // aliases of the value that becomes not-null.
+ val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match {
case IALOAD |
LALOAD |
FALOAD |
@@ -229,7 +155,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
BALOAD |
CALOAD |
SALOAD =>
- aliasId(this.stackTop - 1)
+ aliasesOf(this.stackTop - 1)
case IASTORE |
FASTORE |
@@ -239,35 +165,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
SASTORE |
LASTORE |
DASTORE =>
- aliasId(this.stackTop - 2)
+ aliasesOf(this.stackTop - 2)
case GETFIELD =>
- aliasId(this.stackTop)
+ aliasesOf(this.stackTop)
case PUTFIELD =>
- aliasId(this.stackTop - 1)
+ aliasesOf(this.stackTop - 1)
case INVOKEVIRTUAL |
INVOKESPECIAL |
INVOKEINTERFACE =>
val desc = insn.asInstanceOf[MethodInsnNode].desc
val numArgs = Type.getArgumentTypes(desc).length
- aliasId(this.stackTop - numArgs)
+ aliasesOf(this.stackTop - numArgs)
case ARRAYLENGTH |
MONITORENTER |
MONITOREXIT =>
- aliasId(this.stackTop)
+ aliasesOf(this.stackTop)
case _ =>
- -1
+ null
}
super.execute(insn, interpreter)
- if (nullCheckedAliasId != -1) {
- for (i <- valuesWithAliasId(nullCheckedAliasId))
- this.setValue(i, NotNullValue)
+ if (nullCheckedAliases != null) {
+ val it = nullCheckedAliases.iterator
+ while (it.hasNext)
+ this.setValue(it.next(), NotNullValue)
}
}
}
@@ -276,7 +203,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal
* This class is required to override the `newFrame` methods, which makes makes sure the analyzer
* uses NullnessFrames.
*/
-class NullnessAnalyzer extends Analyzer[NullnessValue](new NullnessInterpreter) {
+class NullnessAnalyzer(bTypes: BTypes, method: MethodNode) extends Analyzer[NullnessValue](new NullnessInterpreter(bTypes, method)) {
override def newFrame(nLocals: Int, nStack: Int): NullnessFrame = new NullnessFrame(nLocals, nStack)
override def newFrame(src: Frame[_ <: NullnessValue]): NullnessFrame = new NullnessFrame(src)
}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
index 594fd8923c..8af4bd4d5d 100644
--- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala
@@ -15,11 +15,10 @@ import scala.tools.asm.{Type, MethodVisitor}
import scala.tools.asm.Opcodes._
import scala.tools.asm.tree._
import scala.tools.asm.tree.analysis._
-import scala.tools.nsc.backend.jvm.BTypes.InternalName
import opt.BytecodeUtils._
-import scala.collection.convert.decorateAsScala._
+import scala.collection.JavaConverters._
/**
* This class provides additional queries over ASM's built-in `SourceValue` analysis.
@@ -55,24 +54,16 @@ import scala.collection.convert.decorateAsScala._
*
* If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as
* copying operations.
+ *
+ * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM)
+ * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge
+ * function (merging producer sets) is more complex than merging simple basic values.
+ * See also the doc comment in the package object `analysis`.
*/
-class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) {
-
- /* Timers for benchmarking ProdCons
- import scala.reflect.internal.util.Statistics._
- import ProdConsAnalyzer._
- val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer)
- val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer)
- */
-
- val analyzer = new Analyzer(new InitialProducerSourceInterpreter)
+trait ProdConsAnalyzerImpl {
+ val methodNode: MethodNode
-// val start = analyzerTimer.start()
- analyzer.analyze(classInternalName, methodNode)
-// analyzerTimer.stop(start)
-// println(analyzerTimer.line)
-
- def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode)
+ def frameAt(insn: AbstractInsnNode): Frame[SourceValue]
/**
* Returns the potential producer instructions of a (local or stack) value in the frame of `insn`.
@@ -102,8 +93,13 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
inputValues(insn).iterator.flatMap(v => v.insns.asScala).toSet
}
- def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] =
- _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty)
+ def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match {
+ case _: UninitializedLocalProducer => Set.empty
+ case ParameterProducer(local) => consumersOfValueAt(methodNode.instructions.getFirst, local)
+ case ExceptionProducer(handlerLabel, handlerFrame) => consumersOfValueAt(handlerLabel, handlerFrame.stackTop)
+ case _ =>
+ _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty)
+ }
/**
* Returns the potential initial producer instructions of a value in the frame of `insn`.
@@ -159,13 +155,19 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
inputValueSlots(insn).flatMap(slot => initialProducersForValueAt(insn, slot)).toSet
}
- def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = {
- lazy val next = insn.getNext
- outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet
+ def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match {
+ case _: UninitializedLocalProducer => Set.empty
+ case _ =>
+ lazy val next = insn match {
+ case _: ParameterProducer => methodNode.instructions.getFirst
+ case ExceptionProducer(handlerLabel, _) => handlerLabel
+ case _ => insn.getNext
+ }
+ outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet
}
private def isCopyOperation(insn: AbstractInsnNode): Boolean = {
- isVarInstruction(insn) || {
+ isLoadOrStore(insn) || {
(insn.getOpcode: @switch) match {
case DUP | DUP_X1 | DUP_X2 | DUP2 | DUP2_X1 | DUP2_X2 | SWAP | CHECKCAST => true
case _ => false
@@ -376,9 +378,9 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
Seq(insn.asInstanceOf[IincInsnNode].`var`)
} else {
val frame = frameAt(insn)
- val stackEffect = InstructionStackEffect(insn, frame)
+ val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame)
val stackSize = frame.getLocals + frame.getStackSize
- (stackSize - stackEffect._1) until stackSize
+ (stackSize - InstructionStackEffect.cons(prodCons)) until stackSize
}
}
@@ -386,7 +388,7 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
private def outputValueSlots(insn: AbstractInsnNode): Seq[Int] = insn match {
case ParameterProducer(local) => Seq(local)
case UninitializedLocalProducer(local) => Seq(local)
- case ExceptionProducer(frame) => Seq(frame.stackTop)
+ case ExceptionProducer(_, frame) => Seq(frame.stackTop)
case _ =>
if (insn.getOpcode == -1) return Seq.empty
if (isStore(insn)) {
@@ -395,16 +397,15 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
Seq(insn.asInstanceOf[IincInsnNode].`var`)
} else {
val frame = frameAt(insn)
- val stackEffect = InstructionStackEffect(insn, frame)
+ val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame)
val nextFrame = frameAt(insn.getNext)
val stackSize = nextFrame.getLocals + nextFrame.getStackSize
- (stackSize - stackEffect._2) until stackSize
+ (stackSize - InstructionStackEffect.prod(prodCons)) until stackSize
}
}
/** For each instruction, a set of potential consumers of the produced values. */
private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = {
-// val start = consumersTimer.start()
var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]]
for {
insn <- methodNode.instructions.iterator.asScala
@@ -417,8 +418,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
val outputIndex = producedSlots.indexOf(i)
res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn))
}
-// consumersTimer.stop(start)
-// println(consumersTimer.line)
res
}
@@ -426,11 +425,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName)
private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty
}
-object ProdConsAnalyzer {
- import scala.reflect.internal.util.Statistics._
- val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm")
-}
-
/**
* A class for pseudo-instructions representing the initial producers of local values that have
* no producer instruction in the method:
@@ -446,10 +440,10 @@ object ProdConsAnalyzer {
* return a;
* }
*
- * In the first frame of the method, the SoruceValue for parameter `a` gives an empty set of
+ * In the first frame of the method, the SourceValue for parameter `a` gives an empty set of
* producer instructions.
*
- * In the frame of the `IRETURN` instruction, the SoruceValue for parameter `a` lists a single
+ * In the frame of the `IRETURN` instruction, the SourceValue for parameter `a` lists a single
* producer instruction: the `ISTORE 1`. This makes it look as if there was a single producer for
* `a`, where in fact it might still hold the parameter's initial value.
*/
@@ -459,9 +453,9 @@ abstract class InitialProducer extends AbstractInsnNode(-1) {
override def accept(cv: MethodVisitor): Unit = throw new UnsupportedOperationException
}
-case class ParameterProducer(local: Int) extends InitialProducer
-case class UninitializedLocalProducer(local: Int) extends InitialProducer
-case class ExceptionProducer(handlerFrame: Frame[_ <: Value]) extends InitialProducer
+case class ParameterProducer(local: Int) extends InitialProducer
+case class UninitializedLocalProducer(local: Int) extends InitialProducer
+case class ExceptionProducer[V <: Value](handlerLabel: LabelNode, handlerFrame: Frame[V]) extends InitialProducer
class InitialProducerSourceInterpreter extends SourceInterpreter {
override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): SourceValue = {
@@ -473,6 +467,6 @@ class InitialProducerSourceInterpreter extends SourceInterpreter {
}
override def newExceptionValue(tryCatchBlockNode: TryCatchBlockNode, handlerFrame: Frame[_ <: Value], exceptionType: Type): SourceValue = {
- new SourceValue(1, ExceptionProducer(handlerFrame))
+ new SourceValue(1, ExceptionProducer(tryCatchBlockNode.handler, handlerFrame))
}
}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala
new file mode 100644
index 0000000000..bcf9978c16
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala
@@ -0,0 +1,36 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import scala.tools.asm.Type
+import scala.tools.asm.tree.analysis.{BasicValue, BasicInterpreter}
+
+abstract class TypeFlowInterpreter extends BasicInterpreter {
+ override def newValue(tp: Type) = {
+ if (tp == null) super.newValue(tp)
+ else if (isRef(tp)) new BasicValue(tp)
+ else super.newValue(tp)
+ }
+
+ def isRef(tp: Type) = tp != null && (tp.getSort match {
+ case Type.OBJECT | Type.ARRAY => true
+ case _ => false
+ })
+
+ def refLub(a: BasicValue, b: BasicValue): BasicValue
+
+ override def merge(a: BasicValue, b: BasicValue): BasicValue = {
+ if (a == b) a
+ else if (isRef(a.getType) && isRef(b.getType)) refLub(a, b)
+ else BasicValue.UNINITIALIZED_VALUE
+ }
+}
+
+/**
+ * A [[TypeFlowInterpreter]] which collapses LUBs of non-equal reference types to Object.
+ * This could be made more precise by looking up ClassBTypes for the two reference types and using
+ * the `jvmWiseLUB` method.
+ */
+class NonLubbingTypeFlowInterpreter extends TypeFlowInterpreter {
+ def refLub(a: BasicValue, b: BasicValue): BasicValue = BasicValue.REFERENCE_VALUE // java/lang/Object
+}
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
new file mode 100644
index 0000000000..999c686aac
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala
@@ -0,0 +1,374 @@
+package scala.tools.nsc.backend.jvm
+
+/**
+ * Summary on the ASM analyzer framework
+ * --------------------------------------
+ *
+ * Value
+ * - Abstract, needs to be implemented for each analysis.
+ * - Represents the desired information about local variables and stack values, for example:
+ * - Is this value known to be null / not null?
+ * - What are the instructions that could potentially have produced this value?
+ *
+ * Interpreter
+ * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
+ * interpreter, e.g., SourceInterpreter or BasicInterpreter.
+ * - Multiple abstract methods that receive an instruction and the instruction's input values, and
+ * return a value representing the result of that instruction.
+ * - Note: due to control flow, the interpreter can be invoked multiple times for the same
+ * instruction, until reaching a fixed point.
+ * - Abstract `merge` function that computes the least upper bound of two values. Used by
+ * Frame.merge (see below).
+ *
+ * Frame
+ * - Can be used directly for many analyses, no subclass required.
+ * - Every frame has an array of values: one for each local variable and for each stack slot.
+ * - A `top` index stores the index of the current stack top
+ * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
+ * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
+ * stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode.
+ * See comment below.
+ * - Defines the `execute(instruction)` method.
+ * - executing mutates the state of the frame according to the effect of the instruction
+ * - pop consumed values from the stack
+ * - pass them to the interpreter together with the instruction
+ * - if applicable, push the resulting value on the stack
+ * - Defines the `merge(otherFrame)` method
+ * - called by the analyzer when multiple control flow paths lead to an instruction
+ * - the frame at the branching instruction is merged into the current frame of the
+ * instruction (held by the analyzer)
+ * - mutates the values of the current frame, merges all values using interpreter.merge.
+ *
+ * Analyzer
+ * - Stores a frame for each instruction
+ * - `merge` function takes an instruction and a frame, merges the existing frame for that instr
+ * (from the frames array) with the new frame passed as argument.
+ * if the frame changed, puts the instruction on the work queue (fixpoint).
+ * - initial frame: initialized for first instr by calling interpreter.new[...]Value
+ * for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
+ * - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
+ * - analyze(method): simulate control flow. while work queue non-empty:
+ * - copy the state of `frames[instr]` into a local frame `current`
+ * - call `current.execute(instr, interpreter)`, mutating the `current` frame
+ * - if it's a branching instruction
+ * - for all potential destination instructions
+ * - merge the destination instruction frame with the `current` frame
+ * (this enqueues the destination instr if its frame changed)
+ * - invoke `newControlFlowEdge` (see below)
+ * - the analyzer also tracks active exception handlers at each instruction
+ * - the empty method `newControlFlowEdge` can be overridden to track control flow if required
+ *
+ *
+ * MaxLocals and MaxStack
+ * ----------------------
+ *
+ * At the JVM level, long and double values occupy two slots, both as local variables and on the
+ * stack, as specified in the JVM spec 2.6.2:
+ * "At any point in time, an operand stack has an associated depth, where a value of type long or
+ * double contributes two units to the depth and a value of any other type contributes one unit."
+ *
+ * For example, a method
+ * class A { def f(a: Long, b: Long) = a + b }
+ * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when
+ * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter).
+ *
+ * For running an ASM Analyzer, long and double values occupy two local variable slots, but only
+ * a single slot on the call stack, as shown by the following snippet:
+ *
+ * import scala.tools.nsc.backend.jvm._
+ * import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._
+ * import scala.collection.convert.decorateAsScala._
+ * import scala.tools.asm.tree.analysis._
+ *
+ * val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class")
+ * val m = cn.methods.iterator.asScala.find(_.name == "f").head
+ *
+ * // the value is read from the classfile, so it's 4
+ * println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4
+ *
+ * // we can safely set it to 2 for running the analyzer.
+ * m.maxStack = 2
+ *
+ * val a = new Analyzer(new BasicInterpreter)
+ * a.analyze(cn.name, m)
+ * val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode
+ * val addFrame = a.frameAt(addInsn, m)
+ *
+ * addFrame.getStackSize // 2: the two long values only take one slot each
+ * addFrame.getLocals // 5: this takes one slot, the two long parameters take 2 slots each
+ *
+ *
+ * While running the optimizer, we need to make sure that the `maxStack` value of a method is
+ * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in
+ * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter.
+ *
+ *
+ * Lessons learnt while benchmarking the alias tracking analysis
+ * -------------------------------------------------------------
+ *
+ * Profiling
+ * - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details
+ * of a hotspot, don't pay too much attention to the percentages / time counts.
+ * - Should also try other profilers.
+ * - Use timers. When a method showed up as a hotspot, I added a timer around that method, and a
+ * second one within the method to measure specific parts. The timers slow things down, but the
+ * relative numbers show what parts of a method are slow.
+ *
+ * ASM analyzer insights
+ * - The time for running an analysis depends on the number of locals and the number of instructions.
+ * Reducing the number of locals helps speeding up the analysis: there are less values to
+ * merge when merging to frames.
+ * See also https://github.com/scala/scala-dev/issues/47
+ * - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers.
+ * - For nullness analysis the time is spent as follows
+ * - 20% merging nullness values. this is as expected: for example, the same absolute amount of
+ * time is spent in merging BasicValues when running a BasicInterpreter.
+ * - 50% merging alias sets. i tried to optimize what i could out of this.
+ * - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init.
+ * - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to
+ * `interpreter.merge`. This can be observed easily by running a test program that either runs
+ * a BasicValue analysis only, versus a program that first runs a nullness analysis and then
+ * a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms,
+ * a 3.8x slowdown.
+ * - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the
+ * examples I benchmarked, the number of merge invocations was always exactly the same.
+ * It would probably be possible to come up with an example where alias set merging forces
+ * additional analysis rounds until reaching the fixpoint, but I did not observe such cases.
+ *
+ * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler
+ * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze
+ * can simply be parsed from a classfile. See example at the end of this comment.
+ *
+ *
+ * Nullness Analysis in Miguel's Optimizer
+ * ---------------------------------------
+ *
+ * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every
+ * frame has an array of values. Miguel's idea was to represent aliasing using reference equality
+ * in the values array: if two entries in the array point to the same value object, the two entries
+ * are aliases in the frame of the given instruction.
+ *
+ * While this idea seems elegant at first sight, Miguel's implementation does not merge frames
+ * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in
+ * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that
+ * c is removed as an alias of (a, b).
+ *
+ * It would be possible to implement correct alias set merging in Miguel's approach. However, frame
+ * merging is the main hot spot of analysis. The computational complexity of implementing alias set
+ * merging by traversing the values array and comparing references is too high. The concrete
+ * alias set representation that is used in the current implementation (see class AliasingFrame)
+ * makes alias set merging more efficient.
+ *
+ * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java
+ *
+ *
+ * Complexity and scaling of analysis
+ * ----------------------------------
+ *
+ * The time complexity of a data flow analysis depends on:
+ *
+ * - The size of the method. The complexity factor is linear (assuming the number of locals and
+ * branching instructions remains constant). The main analysis loop runs through all
+ * instructions of a method once. Instructions are only re-enqueued if a control flow merge
+ * changes the frame at some instruction.
+ *
+ * - The branching instructions. When a second (third, ..) control flow edge arrives at an
+ * instruction, the existing frame at the instruction is merged with the one computed on the
+ * new branch. If the merge function changes the existing frame, the instruction is enqueued
+ * for another analysis. This results in a merge operation for the successors of the
+ * instruction.
+ *
+ * - The number of local variables. The hot spot of analysis is frame merging. The merge function
+ * iterates through the values in the frame (locals and stack values) and merges them.
+ *
+ * I measured the running time of an analysis for two examples:
+ * - Keep the number of locals and branching instructions constant, increase the number of
+ * instructions. The running time grows linearly with the method size.
+ * - Increase the size and number of locals in a method. The method size and number of locals
+ * grow in the same pace. Here, the running time increase is polynomial. It looks like the
+ * complexity is be #instructions * #locals^2 (see below).
+ *
+ * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs
+ * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change.
+ *
+ * The numbers below are for nullness. Note that the last column is constant, i.e., the running
+ * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal
+ * method size for running an analysis.
+ *
+ * #insns #locals time (ms) time / #ins * #loc^2 * 10^6
+ * 1305 156 34 1.07
+ * 2610 311 165 0.65
+ * 3915 466 490 0.57
+ * 5220 621 1200 0.59
+ * 6525 776 2220 0.56
+ * 7830 931 3830 0.56
+ * 9135 1086 6570 0.60
+ * 10440 1241 9700 0.60
+ * 11745 1396 13800 0.60
+ *
+ * As a second experiment, nullness analysis was run with varying #insns but constant #locals.
+ * The last column shows linear complexity with respect to the method size (linearOffset = 2279):
+ *
+ * #insns #locals time (ms) (time + linearOffset) / #insns
+ * 5220 621 1090 0.645
+ * 6224 621 1690 0.637
+ * 7226 621 2280 0.630
+ * 8228 621 2870 0.625
+ * 9230 621 3530 0.629
+ * 10232 621 4130 0.626
+ * 11234 621 4770 0.627
+ * 12236 621 5520 0.637
+ * 13238 621 6170 0.638
+ *
+ *
+ * When running a BasicValue analysis, the complexity observation is the same (time is proportional
+ * to #ins * #loc^2).
+ *
+ *
+ * Measuring analysis execution time
+ * ---------------------------------
+ *
+ * See code below.
+ */
+
+/*
+object Test {
+ val overwrite: Option[String] = null
+
+ @noinline def serialize(o: AnyRef): String = null
+
+ @noinline def deserialize(string: String): AnyRef = null
+
+ @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) {
+ val result = serialize(instance)
+ val reconstituted = deserialize(result).asInstanceOf[T]
+ assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted)))
+ }
+
+ @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) {
+ // pattern match to introduce a lot of control flow, i.e., a lot of frame merges
+ overwrite match {
+ case Some(f) =>
+ case None =>
+ checkRoundTrip(instance)(f)
+ assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance)
+ assert(prevResult == "res", instance)
+ }
+ }
+
+ // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x
+
+ def testMain(): Unit = {
+ // every call to check creates quite a number of locals, and also quite a number of aliases
+ // of the same value (x1). First of all, the default argument call is expanded as below. Then
+ // method check is inlined, and within the body of check, checkRoundTrip and assert have
+ // already been inlined as well.
+
+ // {
+ // val x1 = () => ""
+ // val x2 = fun(x1()) // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1
+ // check(x1())("", x2) // same here for x1
+ // }
+
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 5
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 10
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 15
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 20
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 25
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 30
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 35
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("")
+ check("")("") // 40
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("") // 45
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("") // 50
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("")
+ // check("")("") // 55
+
+ // 1000 bytecode instructions, 0 locals
+ // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10));
+ }
+
+ def timed[T](f: => T): T = {
+ val start = System.nanoTime()
+ val r = f
+ val nanos = System.nanoTime() - start
+ println(s"took ${nanos/1000000}ms")
+ r
+ }
+
+ def main(args: Array[String]): Unit = {
+ import scala.tools.nsc.backend.jvm._
+ val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class")
+ import scala.collection.convert.decorateAsScala._
+ val m = cn.methods.iterator.asScala.find(_.name == "testMain").head
+
+ println(s"${m.instructions.size} instructions - ${m.maxLocals} locals")
+
+ val a = new analysis.NullnessAnalyzer
+ a.analyze(cn.name, m) // warm up
+
+ analysis.AliasingFrame.reset()
+ timed(a.analyze(cn.name, m))
+ analysis.AliasingFrame.timers foreach println
+
+ println("---")
+
+ // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue
+ // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside
+ // Frame.merge is no longer megamorphic.
+
+ import scala.tools.asm.tree.analysis._
+ val ba = new Analyzer(new BasicInterpreter)
+ ba.analyze(cn.name, m) // warm up
+
+ timed(ba.analyze(cn.name, m))
+
+ println("---")
+
+ timed(a.analyze(cn.name, m))
+ }
+}
+*/
+package object analysis