diff options
Diffstat (limited to 'src/compiler/scala/tools/nsc/backend/jvm/analysis')
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala | 556 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala | 508 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala | 273 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala | 181 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala (renamed from src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala) | 80 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala | 36 | ||||
-rw-r--r-- | src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala | 374 |
7 files changed, 1669 insertions, 339 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 7bbe1e2a49..086946e4e3 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -3,17 +3,22 @@ package backend.jvm package analysis import scala.annotation.switch -import scala.collection.{mutable, immutable} +import scala.collection.mutable import scala.tools.asm.Opcodes import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Analyzer, Value, Frame, Interpreter} import opt.BytecodeUtils._ +import AliasSet.SmallBitSet -object AliasingFrame { - private var _idCounter: Long = 0l - private def nextId = { _idCounter += 1; _idCounter } -} - +/** + * A subclass of Frame that tracks aliasing of values stored in local variables and on the stack. + * + * Note: an analysis tracking aliases is roughly 5x slower than a usual analysis (assuming a simple + * value domain with a fast merge function). For example, nullness analysis is roughly 5x slower + * than a BasicValue analysis. + * + * See the doc of package object `analysis` for some notes on the performance of alias analysis. + */ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLocals, nStack) { import Opcodes._ @@ -23,63 +28,80 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc init(src) } - /** - * For each slot (entry in the `values` array of the frame), an id that uniquely represents - * the object stored in it. If two values have the same id, they are aliases of the same - * object. - */ - private val aliasIds: Array[Long] = Array.fill(nLocals + nStack)(AliasingFrame.nextId) + override def toString: String = super.toString + " - " + aliases.toList.filter(s => s != null && s.size > 1).map(_.toString).distinct.mkString(",") /** - * The object alias id of for a value index. - */ - def aliasId(entry: Int) = aliasIds(entry) - - /** - * Returns the indices of the values array which are aliases of the object `id`. + * For every value the set of values that are aliases of it. + * + * Invariants: + * - If `aliases(i) == null` then i has no aliases. This is equivalent to having + * `aliases(i) == SingletonSet(i)`. + * - If `aliases(i) != null` then `aliases(i) contains i`. + * - If `aliases(i) contains j` then `aliases(i) eq aliases(j)`, i.e., they are references to the + * same (mutable) AliasSet. */ - def valuesWithAliasId(id: Long): Set[Int] = immutable.BitSet.empty ++ aliasIds.indices.iterator.filter(i => aliasId(i) == id) + val aliases: Array[AliasSet] = new Array[AliasSet](getLocals + getMaxStackSize) /** * The set of aliased values for a given entry in the `values` array. */ - def aliasesOf(entry: Int): Set[Int] = valuesWithAliasId(aliasIds(entry)) + def aliasesOf(entry: Int): AliasSet = { + if (aliases(entry) != null) aliases(entry) + else { + val init = new AliasSet(new AliasSet.SmallBitSet(entry, -1, -1, -1), 1) + aliases(entry) = init + init + } + } /** - * Define a new alias. For example, given - * var a = this // this, a have the same aliasId - * then an assignment + * Define a new alias. For example, an assignment * b = a - * will set the same the aliasId for `b`. + * adds b to the set of aliases of a. */ private def newAlias(assignee: Int, source: Int): Unit = { - aliasIds(assignee) = aliasIds(source) + removeAlias(assignee) + val sourceAliases = aliasesOf(source) + sourceAliases += assignee + aliases(assignee) = sourceAliases } /** - * An assignment + * Remove an alias. For example, an assignment * a = someUnknownValue() - * sets a fresh alias id for `a`. - * A stack value is also removed from its alias set when being consumed. + * removes a from its former alias set. + * As another example, stack values are removed from their alias sets when being consumed. */ private def removeAlias(assignee: Int): Unit = { - aliasIds(assignee) = AliasingFrame.nextId + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + aliases(assignee) = null + } + } + + /** + * Define the alias set for a given value. + */ + private def setAliasSet(assignee: Int, set: AliasSet): Unit = { + if (aliases(assignee) != null) { + aliases(assignee) -= assignee + } + aliases(assignee) = set } override def execute(insn: AbstractInsnNode, interpreter: Interpreter[V]): Unit = { - // Make the extendsion methods easier to use (otherwise we have to repeat `this`.stackTop) + // Make the extension methods easier to use (otherwise we have to repeat `this`.stackTop) def stackTop: Int = this.stackTop def peekStack(n: Int): V = this.peekStack(n) - // the val pattern `val (p, c) = f` still allocates a tuple (https://github.com/scala-opt/scala/issues/28) - val prodCons = InstructionStackEffect(insn, this) // needs to be called before super.execute, see its doc - val consumed = prodCons._1 - val produced = prodCons._2 + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, this) // needs to be called before super.execute, see its doc + val consumed = InstructionStackEffect.cons(prodCons) + val produced = InstructionStackEffect.prod(prodCons) super.execute(insn, interpreter) (insn.getOpcode: @switch) match { - case ALOAD => + case ILOAD | LLOAD | FLOAD | DLOAD | ALOAD => newAlias(assignee = stackTop, source = insn.asInstanceOf[VarInsnNode].`var`) case DUP => @@ -166,31 +188,54 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc } case SWAP => + // could be written more elegantly with higher-order combinators, but thinking of performance val top = stackTop - val idTop = aliasIds(top) - aliasIds(top) = aliasIds(top - 1) - aliasIds(top - 1) = idTop - case opcode => - if (opcode == ASTORE) { - // Not a separate case because we need to remove the consumed stack value from alias sets after. - val stackTopBefore = stackTop - produced + consumed - val local = insn.asInstanceOf[VarInsnNode].`var` - newAlias(assignee = local, source = stackTopBefore) - // if the value written is size 2, it overwrites the subsequent slot, which is then no - // longer an alias of anything. see the corresponding case in `Frame.execute`. - if (getLocal(local).getSize == 2) - removeAlias(local + 1) - - // if the value at the preceding index is size 2, it is no longer valid, so we remove its - // aliasing. see corresponding case in `Frame.execute` - if (local > 0) { - val precedingValue = getLocal(local - 1) - if (precedingValue != null && precedingValue.getSize == 2) - removeAlias(local - 1) + def moveNextToTop(): Unit = { + val nextAliases = aliases(top - 1) + aliases(top) = nextAliases + nextAliases -= (top - 1) + nextAliases += top + } + + if (aliases(top) != null) { + val topAliases = aliases(top) + if (aliases(top - 1) != null) moveNextToTop() + else aliases(top) = null + // move top to next + aliases(top - 1) = topAliases + topAliases -= top + topAliases += (top - 1) + } else { + if (aliases(top - 1) != null) { + moveNextToTop() + aliases(top - 1) = null } } + case opcode => + (opcode: @switch) match { + case ISTORE | LSTORE | FSTORE | DSTORE | ASTORE => + // not a separate case: we re-use the code below that removes the consumed stack value from alias sets + val stackTopBefore = stackTop - produced + consumed + val local = insn.asInstanceOf[VarInsnNode].`var` + newAlias(assignee = local, source = stackTopBefore) + // if the value written is size 2, it overwrites the subsequent slot, which is then no + // longer an alias of anything. see the corresponding case in `Frame.execute`. + if (getLocal(local).getSize == 2) + removeAlias(local + 1) + + // if the value at the preceding index is size 2, it is no longer valid, so we remove its + // aliasing. see corresponding case in `Frame.execute` + if (local > 0) { + val precedingValue = getLocal(local - 1) + if (precedingValue != null && precedingValue.getSize == 2) + removeAlias(local - 1) + } + + case _ => + } + // Remove consumed stack values from aliasing sets. // Example: iadd // - before: local1, local2, stack1, consumed1, consumed2 @@ -198,10 +243,22 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc val firstConsumed = stackTop - produced + 1 // firstConsumed = 3 for (i <- 0 until consumed) removeAlias(firstConsumed + i) // remove aliases for 3 and 4 + } + } - // We don't need to set the aliases ids for the produced values: the aliasIds array already - // contains fresh ids for non-used stack values (ensured by removeAlias). + /** + * When entering an exception handler, all values are dropped from the stack (and the exception + * value is pushed). The ASM analyzer invokes `firstHandlerInstructionFrame.clearStack()`. To + * ensure consistent aliasing sets, we need to remove the dropped values from aliasing sets. + */ + override def clearStack(): Unit = { + var i = getLocals + val end = i + getStackSize + while (i < end) { + removeAlias(i) + i += 1 } + super.clearStack() } /** @@ -217,30 +274,131 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc * x = a * y = b // (x, a) and (y, b) * } - * [...] // (x, a) + * [...] // (x, a) -- merge of ((x, y, a)) and ((x, a), (y, b)) */ override def merge(other: Frame[_ <: V], interpreter: Interpreter[V]): Boolean = { + // merge is the main performance hot spot of a data flow analysis. + + // in nullness analysis, super.merge (which actually merges the nullness values) takes 20% of + // the overall analysis time. val valuesChanged = super.merge(other, interpreter) + + // in nullness analysis, merging the alias sets takes ~55% of the analysis time. therefore, this + // code has been heavily optimized. most of the time is spent in the `hasNext` method of the + // andNotIterator, see its comment. + var aliasesChanged = false val aliasingOther = other.asInstanceOf[AliasingFrame[_]] - for (i <- aliasIds.indices) { - val thisAliases = aliasesOf(i) - val thisNotOther = thisAliases diff (thisAliases intersect aliasingOther.aliasesOf(i)) - if (thisNotOther.nonEmpty) { - aliasesChanged = true - thisNotOther foreach removeAlias + + val numValues = getLocals + getStackSize + // assume (a, b) are aliases both in this frame, and the other frame. when merging the alias set + // for a, we already see that a and b will be aliases in the final result. so we can skip over + // merging the alias set for b. in this case, while merging the sets for a, knownOk(b) will be + // set to `true`. + val knownOk = new Array[Boolean](numValues) + var i = 0 + while (i < numValues) { + if (!knownOk(i)) { + val thisAliases = this.aliases(i) + val otherAliases = aliasingOther.aliases(i) + if (thisAliases != null) { + if (otherAliases == null) { + if (thisAliases.size > 1) { + aliasesChanged = true + removeAlias(i) + } + } else { + // The iterator yields elements that are in `thisAliases` but not in `otherAliases`. + // As a side-effect, for every index `i` that is in both alias sets, the iterator sets + // `knownOk(i) = true`: the alias sets for these values don't need to be merged again. + val thisNotOtherIt = AliasSet.andNotIterator(thisAliases, otherAliases, knownOk) + if (thisNotOtherIt.hasNext) { + aliasesChanged = true + val newSet = AliasSet.empty + while (thisNotOtherIt.hasNext) { + val next = thisNotOtherIt.next() + newSet += next + setAliasSet(next, newSet) + } + } + } + } } + i += 1 } + valuesChanged || aliasesChanged } + private def min(s: SmallBitSet) = { + var r = s.a + if ( s.b < r) r = s.b + if (s.c != -1 && s.c < r) r = s.c + if (s.d != -1 && s.d < r) r = s.d + r + } + override def init(src: Frame[_ <: V]): Frame[V] = { - super.init(src) - compat.Platform.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliasIds, 0, aliasIds, 0, aliasIds.length) + super.init(src) // very quick (just an arraycopy) + System.arraycopy(src.asInstanceOf[AliasingFrame[_]].aliases, 0, aliases, 0, aliases.length) // also quick + + val newSets = mutable.HashMap.empty[AliasSet, AliasSet] + + // the rest of this method (cloning alias sets) is the second performance˙hotspot (next to + // AliasingFrame.merge). for nullness, it takes ~20% of the analysis time. + // the difficulty here is that we have to clone the alias sets correctly. if two values a, b are + // aliases, then aliases(a) eq aliases(b). we need to make sure to use the same clone for the + // two values. + + var i = 0 + while (i < aliases.length) { + val set = aliases(i) + if (set != null) { + // size cannot be 0 - alias sets are always at least singletons. + // for sets of size 1-4, don't use the `newSets` map - lookup / update is slow + if (set.size == 1) { + aliases(i) = null + } else if (set.size <= 4) { + val small = set.set.asInstanceOf[AliasSet.SmallBitSet] + val firstOfSet = i == min(small) + if (firstOfSet) { + val newSet = set.clone() + aliases(small.a) = newSet + aliases(small.b) = newSet + if (small.c != -1) aliases(small.c) = newSet + if (small.d != -1) aliases(small.d) = newSet + } + } else { + // the actual hot spot is the hash map operations here: this is where almost all of the 20% + // mentioned above is spent. + // i also benchmarked an alternative implementation: keep an array of booleans for indexes + // that already contain the cloned set. iterate through all elements of the cloned set and + // assign the cloned set. this approach is 50% slower than using a hash map. + if (newSets contains set) aliases(i) = newSets(set) + else { + val newSet = set.clone() + newSets(set) = newSet + aliases(i) = newSet + } + } + } + i += 1 + } this } } +object AliasingFrame { +// val start1 = AliasingFrame.timer1.start() +// AliasingFrame.timer1.stop(start1) + import scala.reflect.internal.util.Statistics._ + val timer1 = newTimer("t1", "jvm") + val timer2 = newTimer("t2", "jvm") + val timer3 = newTimer("t3", "jvm") + val timers = List(timer1, timer2, timer3) + def reset(): Unit = for (t <- timers) { t.nanos = 0; t.timings = 0 } +} + /** * An analyzer that uses AliasingFrames instead of bare Frames. This can be used when an analysis * needs to track aliases, but doesn't require a more specific Frame subclass. @@ -249,3 +407,269 @@ class AliasingAnalyzer[V <: Value](interpreter: Interpreter[V]) extends Analyzer override def newFrame(nLocals: Int, nStack: Int): AliasingFrame[V] = new AliasingFrame(nLocals, nStack) override def newFrame(src: Frame[_ <: V]): AliasingFrame[V] = new AliasingFrame(src) } + +/** + * An iterator over Int (required to prevent boxing the result of next). + */ +abstract class IntIterator extends Iterator[Int] { + def hasNext: Boolean + def next(): Int +} + +/** + * An efficient mutable bit set. + * + * @param set Either a SmallBitSet or an Array[Long] + * @param size The size of the set, useful for performance of certain operations + */ +class AliasSet(var set: Object /*SmallBitSet | Array[Long]*/, var size: Int) { + import AliasSet._ + + override def toString: String = iterator.toSet.mkString("<", ",", ">") + + /** + * An iterator for the elements of this bit set. Note that only one iterator can be used at a + * time. Also make sure not to change the underlying AliasSet during iteration. + */ + def iterator: IntIterator = andNotIterator(this, empty, null) + + def +=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => s.a = value; size = 1 + case 1 => if (value != s.a) { s.b = value; size = 2 } + case 2 => if (value != s.a && value != s.b) { s.c = value; size = 3 } + case 3 => if (value != s.a && value != s.b && value != s.c) { s.d = value; size = 4 } + case 4 => + if (value != s.a && value != s.b && value != s.c && value != s.d) { + this.set = bsEmpty + this.size = 0 + bsAdd(this, s.a) + bsAdd(this, s.b) + bsAdd(this, s.c) + bsAdd(this, s.d) + bsAdd(this, value) + } + } + case bits: Array[Long] => + bsAdd(this, value) + } + + def -=(value: Int): Unit = this.set match { + case s: SmallBitSet => (size: @switch) match { + case 0 => + case 1 => + if (value == s.a) { s.a = -1; size = 0 } + case 2 => + if (value == s.a) { s.a = s.b; s.b = -1; size = 1 } + else if (value == s.b) { s.b = -1; size = 1 } + case 3 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = -1; size = 2 } + else if (value == s.b) { s.b = s.c; s.c = -1; size = 2 } + else if (value == s.c) { s.c = -1; size = 2 } + case 4 => + if (value == s.a) { s.a = s.b; s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.b) { s.b = s.c; s.c = s.d; s.d = -1; size = 3 } + else if (value == s.c) { s.c = s.d; s.d = -1; size = 3 } + else if (value == s.d) { s.d = -1; size = 3 } + } + case bits: Array[Long] => + bsRemove(this, value) + if (this.size == 4) + this.set = bsToSmall(this.set.asInstanceOf[Array[Long]]) + } + + override def clone(): AliasSet = { + val resSet = this.set match { + case s: SmallBitSet => new SmallBitSet(s.a, s.b, s.c, s.d) + case bits: Array[Long] => bits.clone() + } + new AliasSet(resSet, this.size) + } +} + +object AliasSet { + def empty = new AliasSet(new SmallBitSet(-1, -1, -1, -1), 0) + + final class SmallBitSet(var a: Int, var b: Int, var c: Int, var d: Int) { + override def toString = s"($a, $b, $c, $d)" + } + + def bsEmpty: Array[Long] = new Array[Long](1) + + private def bsEnsureCapacity(set: Array[Long], index: Int): Array[Long] = { + if (index < set.length) set + else { + var newLength = set.length + while (index >= newLength) newLength *= 2 + val newSet = new Array[Long](newLength) + Array.copy(set, 0, newSet, 0, set.length) + newSet + } + } + + def bsAdd(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + val resSet = bsEnsureCapacity(bits, index) + val before = resSet(index) + val result = before | (1l << bit) + if (result != before) { + resSet(index) = result + set.set = resSet + set.size += 1 + } + } + + def bsRemove(set: AliasSet, bit: Int): Unit = { + val bits = set.set.asInstanceOf[Array[Long]] + val index = bit >> 6 + if (index < bits.length) { + val before = bits(index) + val result = before & ~(1l << bit) + if (result != before) { + bits(index) = result + set.size -= 1 + } + } + } + + def bsContains(set: Array[Long], bit: Int): Boolean = { + val index = bit >> 6 + bit >= 0 && index < set.length && (set(index) & (1L << bit)) != 0L + } + +// var sizesHist: Array[Int] = new Array[Int](1000) + + /** + * Convert a bit array to a SmallBitSet. Requires the bit array to contain exactly four bits. + */ + def bsToSmall(bits: Array[Long]): SmallBitSet = { + var a = -1 + var b = -1 + var c = -1 + var i = 0 + val end = bits.length * 64 + while (i < end) { + if (bsContains(bits, i)) { + if (a == -1) a = i + else if (b == -1) b = i + else if (c == -1) c = i + else return new SmallBitSet(a, b, c, i) + } + i += 1 + } + null + } + + /** + * An iterator that yields the elements that are in one bit set and not in another (&~). + */ + private class AndNotIt(setA: AliasSet, setB: AliasSet, thisAndOther: Array[Boolean]) extends IntIterator { + // values in the first bit set + private var a, b, c, d = -1 + private var xs: Array[Long] = null + + // values in the second bit set + private var notA, notB, notC, notD = -1 + private var notXs: Array[Long] = null + + // holds the next value of `x`, `y` or `z` that should be returned. assigned in hasNext + private var abcdNext = -1 + + // counts through elements in the `xs` bit set + private var i = 0 + // true if the current value of `i` should be returned by this iterator + private var iValid = false + + setA.set match { + case s: SmallBitSet => a = s.a; b = s.b; c = s.c; d = s.d + case bits: Array[Long] => xs = bits + } + + setB.set match { + case s: SmallBitSet => notA = s.a; notB = s.b; notC = s.c; notD = s.d + case bits: Array[Long] => notXs = bits + } + + // for each value that exists both in this AND (&) the other bit, `thisAndOther` is set to true. + // hacky side-effect, used for performance of AliasingFrame.merge. + private def setThisAndOther(x: Int) = if (thisAndOther != null) thisAndOther(x) = true + + private def checkABCD(x: Int, num: Int): Boolean = { + // assert(x == a && num == 1 || x == b && num == 2 || ...) + x != -1 && { + val otherHasA = x == notA || x == notB || x == notC || x == notD || (notXs != null && bsContains(notXs, x)) + if (otherHasA) setThisAndOther(x) + else abcdNext = x + (num: @switch) match { + case 1 => a = -1 + case 2 => b = -1 + case 3 => c = -1 + case 4 => d = -1 + } + !otherHasA + } + } + + // main performance hot spot + private def checkXs = { + (xs != null) && { + val end = xs.length * 64 + + while (i < end && { + val index = i >> 6 + if (xs(index) == 0l) { // boom. for nullness, this saves 35% of the overall analysis time. + i = ((index + 1) << 6) - 1 // -1 required because i is incremented in the loop body + true + } else { + val mask = 1l << i + // if (mask > xs(index)) we could also advance i to the next value, but that didn't pay off in benchmarks + val thisHasI = (xs(index) & mask) != 0l + !thisHasI || { + val otherHasI = i == notA || i == notB || i == notC || i == notD || (notXs != null && index < notXs.length && (notXs(index) & mask) != 0l) + if (otherHasI) setThisAndOther(i) + otherHasI + } + } + }) i += 1 + + iValid = i < end + iValid + } + } + + // this is the main hot spot of alias analysis. for nullness, 38% of the overall analysis time + // is spent here. within hasNext, almost the entire time is spent in `checkXs`. + // + def hasNext: Boolean = iValid || abcdNext != -1 || checkABCD(a, 1) || checkABCD(b, 2) || checkABCD(c, 3) || checkABCD(d, 4) || checkXs + + def next(): Int = { + if (hasNext) { + if (abcdNext != -1) { + val r = abcdNext; abcdNext = -1; r + } else { + val r = i; i += 1; iValid = false; r + } + } else Iterator.empty.next() + } + } + +// The number of bits in a bit array. Useful for debugging. +// def bsSize(bits: Array[Long]) = { +// var r = 0 +// var i = 0 +// while (i < bits.length) { +// r += java.lang.Long.bitCount(bits(i)) +// i += 1 +// } +// r +// } + + /** + * An iterator returning the elements in a that are not also in b (a &~ b). + * + * If `thisAndOther` is non-null, the iterator sets thisAndOther(i) to true for every value that + * is both in a and b (&). + */ + def andNotIterator(a: AliasSet, b: AliasSet, thisAndOther: Array[Boolean]): IntIterator = new AndNotIt(a, b, thisAndOther) +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala new file mode 100644 index 0000000000..f94642389d --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala @@ -0,0 +1,508 @@ +package scala.tools.nsc +package backend.jvm +package analysis + +import scala.annotation.switch +import scala.tools.asm.{Handle, Type} +import scala.tools.asm.Opcodes._ +import scala.tools.asm.tree._ +import scala.tools.asm.tree.analysis.{Frame, BasicInterpreter, Analyzer, Value} +import GenBCode._ +import scala.tools.nsc.backend.jvm.BTypes._ +import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ +import java.lang.invoke.LambdaMetafactory +import scala.collection.mutable +import scala.collection.JavaConverters._ + +/** + * This component hosts tools and utilities used in the backend that require access to a `BTypes` + * instance. + * + * One example is the AsmAnalyzer class, which runs `computeMaxLocalsMaxStack` on the methodNode to + * be analyzed. This method in turn lives inside the BTypes assembly because it queries the per-run + * cache `maxLocalsMaxStackComputed` defined in there. + */ +class BackendUtils[BT <: BTypes](val btypes: BT) { + import btypes._ + import btypes.coreBTypes._ + import callGraph.ClosureInstantiation + + /** + * A wrapper to make ASM's Analyzer a bit easier to use. + */ + class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) { + computeMaxLocalsMaxStack(methodNode) + analyzer.analyze(classInternalName, methodNode) + def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) + } + + /** + * See the doc comment on package object `analysis` for a discussion on performance. + */ + object AsmAnalyzer { + // jvm limit is 65535 for both number of instructions and number of locals + + private def size(method: MethodNode) = method.instructions.size.toLong * method.maxLocals * method.maxLocals + + // with the limits below, analysis should not take more than one second + + private val nullnessSizeLimit = 5000l * 600l * 600l // 5000 insns, 600 locals + private val basicValueSizeLimit = 9000l * 1000l * 1000l + private val sourceValueSizeLimit = 8000l * 950l * 950l + + def sizeOKForAliasing(method: MethodNode): Boolean = size(method) < nullnessSizeLimit + def sizeOKForNullness(method: MethodNode): Boolean = size(method) < nullnessSizeLimit + def sizeOKForBasicValue(method: MethodNode): Boolean = size(method) < basicValueSizeLimit + def sizeOKForSourceValue(method: MethodNode): Boolean = size(method) < sourceValueSizeLimit + } + + class ProdConsAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new InitialProducerSourceInterpreter)) with ProdConsAnalyzerImpl + + class NonLubbingTypeFlowAnalyzer(val methodNode: MethodNode, classInternalName: InternalName) extends AsmAnalyzer(methodNode, classInternalName, new Analyzer(new NonLubbingTypeFlowInterpreter)) + + /** + * Add: + * private static Object $deserializeLambda$(SerializedLambda l) { + * return indy[scala.runtime.LambdaDeserialize.bootstrap](l) + * } + * + * We use invokedynamic here to enable caching within the deserializer without needing to + * host a static field in the enclosing class. This allows us to add this method to interfaces + * that define lambdas in default methods. + */ + def addLambdaDeserialize(classNode: ClassNode): Unit = { + val cw = classNode + + // Make sure to reference the ClassBTypes of all types that are used in the code generated + // here (e.g. java/util/Map) are initialized. Initializing a ClassBType adds it to the + // `classBTypeFromInternalName` map. When writing the classfile, the asm ClassWriter computes + // stack map frames and invokes the `getCommonSuperClass` method. This method expects all + // ClassBTypes mentioned in the source code to exist in the map. + + val nilLookupDesc = MethodBType(Nil, jliMethodHandlesLookupRef).descriptor + val serlamObjDesc = MethodBType(jliSerializedLambdaRef :: Nil, ObjectRef).descriptor + + { + val mv = cw.visitMethod(ACC_PRIVATE + ACC_STATIC + ACC_SYNTHETIC, "$deserializeLambda$", serlamObjDesc, null, null) + mv.visitCode() + mv.visitVarInsn(ALOAD, 0) + mv.visitInvokeDynamicInsn("lambdaDeserialize", serlamObjDesc, lambdaDeserializeBootstrapHandle) + mv.visitInsn(ARETURN) + mv.visitEnd() + } + } + + /** + * Clone the instructions in `methodNode` into a new [[InsnList]], mapping labels according to + * the `labelMap`. Returns the new instruction list and a map from old to new instructions, and + * a boolean indicating if the instruction list contains an instantiation of a serializable SAM + * type. + */ + def cloneInstructions(methodNode: MethodNode, labelMap: Map[LabelNode, LabelNode]): (InsnList, Map[AbstractInsnNode, AbstractInsnNode], Boolean) = { + val javaLabelMap = labelMap.asJava + val result = new InsnList + var map = Map.empty[AbstractInsnNode, AbstractInsnNode] + var hasSerializableClosureInstantiation = false + for (ins <- methodNode.instructions.iterator.asScala) { + if (!hasSerializableClosureInstantiation) ins match { + case callGraph.LambdaMetaFactoryCall(indy, _, _, _) => indy.bsmArgs match { + case Array(_, _, _, flags: Integer, xs@_*) if (flags.intValue & LambdaMetafactory.FLAG_SERIALIZABLE) != 0 => + hasSerializableClosureInstantiation = true + case _ => + } + case _ => + } + val cloned = ins.clone(javaLabelMap) + result add cloned + map += ((ins, cloned)) + } + (result, map, hasSerializableClosureInstantiation) + } + + def getBoxedUnit: FieldInsnNode = new FieldInsnNode(GETSTATIC, srBoxedUnitRef.internalName, "UNIT", srBoxedUnitRef.descriptor) + + private val anonfunAdaptedName = """.*\$anonfun\$\d+\$adapted""".r + def hasAdaptedImplMethod(closureInit: ClosureInstantiation): Boolean = { + isBuiltinFunctionType(Type.getReturnType(closureInit.lambdaMetaFactoryCall.indy.desc).getInternalName) && + anonfunAdaptedName.pattern.matcher(closureInit.lambdaMetaFactoryCall.implMethod.getName).matches + } + + private def primitiveAsmTypeToBType(primitiveType: Type): PrimitiveBType = (primitiveType.getSort: @switch) match { + case Type.BOOLEAN => BOOL + case Type.BYTE => BYTE + case Type.CHAR => CHAR + case Type.SHORT => SHORT + case Type.INT => INT + case Type.LONG => LONG + case Type.FLOAT => FLOAT + case Type.DOUBLE => DOUBLE + case _ => null + } + + def isScalaBox(insn: MethodInsnNode): Boolean = { + insn.owner == srBoxesRunTimeRef.internalName && { + val args = Type.getArgumentTypes(insn.desc) + args.length == 1 && (srBoxesRuntimeBoxToMethods.get(primitiveAsmTypeToBType(args(0))) match { + case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc + case _ => false + }) + } + } + + def getScalaBox(primitiveType: Type): MethodInsnNode = { + val bType = primitiveAsmTypeToBType(primitiveType) + val MethodNameAndType(name, methodBType) = srBoxesRuntimeBoxToMethods(bType) + new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false) + } + + def isScalaUnbox(insn: MethodInsnNode): Boolean = { + insn.owner == srBoxesRunTimeRef.internalName && (srBoxesRuntimeUnboxToMethods.get(primitiveAsmTypeToBType(Type.getReturnType(insn.desc))) match { + case Some(MethodNameAndType(name, tp)) => name == insn.name && tp.descriptor == insn.desc + case _ => false + }) + } + + def getScalaUnbox(primitiveType: Type): MethodInsnNode = { + val bType = primitiveAsmTypeToBType(primitiveType) + val MethodNameAndType(name, methodBType) = srBoxesRuntimeUnboxToMethods(bType) + new MethodInsnNode(INVOKESTATIC, srBoxesRunTimeRef.internalName, name, methodBType.descriptor, /*itf =*/ false) + } + + private def calleeInMap(insn: MethodInsnNode, map: Map[InternalName, MethodNameAndType]): Boolean = map.get(insn.owner) match { + case Some(MethodNameAndType(name, tp)) => insn.name == name && insn.desc == tp.descriptor + case _ => false + } + + def isJavaBox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaBoxMethods) + def isJavaUnbox(insn: MethodInsnNode): Boolean = calleeInMap(insn, javaUnboxMethods) + + def isPredefAutoBox(insn: MethodInsnNode): Boolean = { + insn.owner == PredefRef.internalName && (predefAutoBoxMethods.get(insn.name) match { + case Some(tp) => insn.desc == tp.descriptor + case _ => false + }) + } + + def isPredefAutoUnbox(insn: MethodInsnNode): Boolean = { + insn.owner == PredefRef.internalName && (predefAutoUnboxMethods.get(insn.name) match { + case Some(tp) => insn.desc == tp.descriptor + case _ => false + }) + } + + def isRefCreate(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefCreateMethods) + def isRefZero(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefZeroMethods) + + def runtimeRefClassBoxedType(refClass: InternalName): Type = Type.getArgumentTypes(srRefCreateMethods(refClass).methodType.descriptor)(0) + + def isSideEffectFreeCall(insn: MethodInsnNode): Boolean = { + isScalaBox(insn) || isScalaUnbox(insn) || + isJavaBox(insn) || // not java unbox, it may NPE + isSideEffectFreeConstructorCall(insn) + } + + def isNonNullMethodInvocation(mi: MethodInsnNode): Boolean = { + isJavaBox(mi) || isScalaBox(mi) || isPredefAutoBox(mi) || isRefCreate(mi) || isRefZero(mi) + } + + def isModuleLoad(insn: AbstractInsnNode, moduleName: InternalName): Boolean = insn match { + case fi: FieldInsnNode => fi.getOpcode == GETSTATIC && fi.owner == moduleName && fi.name == "MODULE$" && fi.desc == ("L" + moduleName + ";") + case _ => false + } + + def isPredefLoad(insn: AbstractInsnNode) = isModuleLoad(insn, PredefRef.internalName) + + def isPrimitiveBoxConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, primitiveBoxConstructors) + def isRuntimeRefConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, srRefConstructors) + def isTupleConstructor(insn: MethodInsnNode): Boolean = calleeInMap(insn, tupleClassConstructors) + + // unused objects created by these constructors are eliminated by pushPop + private lazy val sideEffectFreeConstructors: Set[(String, String)] = { + val ownerDesc = (p: (InternalName, MethodNameAndType)) => (p._1, p._2.methodType.descriptor) + primitiveBoxConstructors.map(ownerDesc).toSet ++ + srRefConstructors.map(ownerDesc) ++ + tupleClassConstructors.map(ownerDesc) ++ Set( + (ObjectRef.internalName, MethodBType(Nil, UNIT).descriptor), + (StringRef.internalName, MethodBType(Nil, UNIT).descriptor), + (StringRef.internalName, MethodBType(List(StringRef), UNIT).descriptor), + (StringRef.internalName, MethodBType(List(ArrayBType(CHAR)), UNIT).descriptor)) + } + + def isSideEffectFreeConstructorCall(insn: MethodInsnNode): Boolean = { + insn.name == INSTANCE_CONSTRUCTOR_NAME && sideEffectFreeConstructors((insn.owner, insn.desc)) + } + + private lazy val classesOfSideEffectFreeConstructors = sideEffectFreeConstructors.map(_._1) + + def isNewForSideEffectFreeConstructor(insn: AbstractInsnNode) = { + insn.getOpcode == NEW && { + val ti = insn.asInstanceOf[TypeInsnNode] + classesOfSideEffectFreeConstructors.contains(ti.desc) + } + } + + def isBoxedUnit(insn: AbstractInsnNode) = { + insn.getOpcode == GETSTATIC && { + val fi = insn.asInstanceOf[FieldInsnNode] + fi.owner == srBoxedUnitRef.internalName && fi.name == "UNIT" && fi.desc == srBoxedUnitRef.descriptor + } + } + + def isBuiltinFunctionType(internalName: InternalName): Boolean = functionRefs(internalName) + + /** + * Visit the class node and collect all referenced nested classes. + */ + def collectNestedClasses(classNode: ClassNode): List[ClassBType] = { + val innerClasses = mutable.Set.empty[ClassBType] + + def visitInternalName(internalName: InternalName): Unit = if (internalName != null) { + val t = classBTypeFromParsedClassfile(internalName) + if (t.isNestedClass.get) innerClasses += t + } + + // either an internal/Name or [[Linternal/Name; -- there are certain references in classfiles + // that are either an internal name (without the surrounding `L;`) or an array descriptor + // `[Linternal/Name;`. + def visitInternalNameOrArrayReference(ref: String): Unit = if (ref != null) { + val bracket = ref.lastIndexOf('[') + if (bracket == -1) visitInternalName(ref) + else if (ref.charAt(bracket + 1) == 'L') visitInternalName(ref.substring(bracket + 2, ref.length - 1)) + } + + // we are only interested in the class references in the descriptor, so we can skip over + // primitives and the brackets of array descriptors + def visitDescriptor(desc: String): Unit = (desc.charAt(0): @switch) match { + case '(' => + val internalNames = mutable.ListBuffer.empty[String] + var i = 1 + while (i < desc.length) { + if (desc.charAt(i) == 'L') { + val start = i + 1 // skip the L + while (desc.charAt(i) != ';') i += 1 + internalNames append desc.substring(start, i) + } + // skips over '[', ')', primitives + i += 1 + } + internalNames foreach visitInternalName + + case 'L' => + visitInternalName(desc.substring(1, desc.length - 1)) + + case '[' => + visitInternalNameOrArrayReference(desc) + + case _ => // skip over primitive types + } + + def visitConstant(const: AnyRef): Unit = const match { + case t: Type => visitDescriptor(t.getDescriptor) + case _ => + } + + // in principle we could references to annotation types, as they only end up as strings in the + // constant pool, not as class references. however, the java compiler still includes nested + // annotation classes in the innerClass table, so we do the same. explained in detail in the + // large comment in class BTypes. + def visitAnnotation(annot: AnnotationNode): Unit = { + visitDescriptor(annot.desc) + if (annot.values != null) annot.values.asScala foreach visitConstant + } + + def visitAnnotations(annots: java.util.List[_ <: AnnotationNode]) = if (annots != null) annots.asScala foreach visitAnnotation + def visitAnnotationss(annotss: Array[java.util.List[AnnotationNode]]) = if (annotss != null) annotss foreach visitAnnotations + + def visitHandle(handle: Handle): Unit = { + visitInternalNameOrArrayReference(handle.getOwner) + visitDescriptor(handle.getDesc) + } + + visitInternalName(classNode.name) + innerClasses ++= classBTypeFromParsedClassfile(classNode.name).info.get.nestedClasses + + visitInternalName(classNode.superName) + classNode.interfaces.asScala foreach visitInternalName + visitInternalName(classNode.outerClass) + + visitAnnotations(classNode.visibleAnnotations) + visitAnnotations(classNode.visibleTypeAnnotations) + visitAnnotations(classNode.invisibleAnnotations) + visitAnnotations(classNode.invisibleTypeAnnotations) + + for (f <- classNode.fields.asScala) { + visitDescriptor(f.desc) + visitAnnotations(f.visibleAnnotations) + visitAnnotations(f.visibleTypeAnnotations) + visitAnnotations(f.invisibleAnnotations) + visitAnnotations(f.invisibleTypeAnnotations) + } + + for (m <- classNode.methods.asScala) { + visitDescriptor(m.desc) + + visitAnnotations(m.visibleAnnotations) + visitAnnotations(m.visibleTypeAnnotations) + visitAnnotations(m.invisibleAnnotations) + visitAnnotations(m.invisibleTypeAnnotations) + visitAnnotationss(m.visibleParameterAnnotations) + visitAnnotationss(m.invisibleParameterAnnotations) + visitAnnotations(m.visibleLocalVariableAnnotations) + visitAnnotations(m.invisibleLocalVariableAnnotations) + + m.exceptions.asScala foreach visitInternalName + for (tcb <- m.tryCatchBlocks.asScala) visitInternalName(tcb.`type`) + + val iter = m.instructions.iterator() + while (iter.hasNext) iter.next() match { + case ti: TypeInsnNode => visitInternalNameOrArrayReference(ti.desc) + case fi: FieldInsnNode => visitInternalNameOrArrayReference(fi.owner); visitDescriptor(fi.desc) + case mi: MethodInsnNode => visitInternalNameOrArrayReference(mi.owner); visitDescriptor(mi.desc) + case id: InvokeDynamicInsnNode => visitDescriptor(id.desc); visitHandle(id.bsm); id.bsmArgs foreach visitConstant + case ci: LdcInsnNode => visitConstant(ci.cst) + case ma: MultiANewArrayInsnNode => visitDescriptor(ma.desc) + case _ => + } + } + innerClasses.toList + } + + /** + * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM + * framework only computes these values during bytecode generation. + * + * NOTE 1: as explained in the `analysis` package object, the maxStack value used by the Analyzer + * may be smaller than the correct maxStack value in the classfile (Analyzers only use a single + * slot for long / double values). The maxStack computed here are correct for running an analyzer, + * but not for writing in the classfile. We let the ClassWriter recompute max's. + * + * NOTE 2: the maxStack value computed here may be larger than the smallest correct value + * that would allow running an analyzer, see `InstructionStackEffect.forAsmAnalysisConservative`. + * + * NOTE 3: the implementation doesn't look at instructions that cannot be reached, it computes + * the max local / stack size in the reachable code. These max's work just fine for running an + * Analyzer: its implementation also skips over unreachable code in the same way. + */ + def computeMaxLocalsMaxStack(method: MethodNode): Unit = { + if (isAbstractMethod(method) || isNativeMethod(method)) { + method.maxLocals = 0 + method.maxStack = 0 + } else if (!maxLocalsMaxStackComputed(method)) { + val size = method.instructions.size + + var maxLocals = parametersSize(method) + var maxStack = 0 + + // queue of instruction indices where analysis should start + var queue = new Array[Int](8) + var top = -1 + def enq(i: Int): Unit = { + if (top == queue.length - 1) { + val nq = new Array[Int](queue.length * 2) + Array.copy(queue, 0, nq, 0, queue.length) + queue = nq + } + top += 1 + queue(top) = i + } + def deq(): Int = { + val r = queue(top) + top -= 1 + r + } + + val subroutineRetTargets = new mutable.Stack[AbstractInsnNode] + + // for each instruction in the queue, contains the stack height at this instruction. + // once an instruction has been treated, contains -1 to prevent re-enqueuing + val stackHeights = new Array[Int](size) + + def enqInsn(insn: AbstractInsnNode, height: Int): Unit = { + enqInsnIndex(method.instructions.indexOf(insn), height) + } + + def enqInsnIndex(insnIndex: Int, height: Int): Unit = { + if (insnIndex < size && stackHeights(insnIndex) != -1) { + stackHeights(insnIndex) = height + enq(insnIndex) + } + } + + val tcbIt = method.tryCatchBlocks.iterator() + while (tcbIt.hasNext) { + val tcb = tcbIt.next() + enqInsn(tcb.handler, 1) + if (maxStack == 0) maxStack = 1 + } + + enq(0) + while (top != -1) { + val insnIndex = deq() + val insn = method.instructions.get(insnIndex) + val initHeight = stackHeights(insnIndex) + stackHeights(insnIndex) = -1 // prevent i from being enqueued again + + if (insn.getOpcode == -1) { // frames, labels, line numbers + enqInsnIndex(insnIndex + 1, initHeight) + } else { + val stackGrowth = InstructionStackEffect.maxStackGrowth(insn) + val heightAfter = initHeight + stackGrowth + if (heightAfter > maxStack) maxStack = heightAfter + + // update maxLocals + insn match { + case v: VarInsnNode => + val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0 + maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 because local numbers are 0-based + + case i: IincInsnNode => + maxLocals = math.max(maxLocals, i.`var` + 1) + + case _ => + } + + insn match { + case j: JumpInsnNode => + if (j.getOpcode == JSR) { + val jsrTargetHeight = heightAfter + 1 + if (jsrTargetHeight > maxStack) maxStack = jsrTargetHeight + subroutineRetTargets.push(j.getNext) + enqInsn(j.label, jsrTargetHeight) + } else { + enqInsn(j.label, heightAfter) + val opc = j.getOpcode + if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target + } + + case l: LookupSwitchInsnNode => + var j = 0 + while (j < l.labels.size) { + enqInsn(l.labels.get(j), heightAfter); j += 1 + } + enqInsn(l.dflt, heightAfter) + + case t: TableSwitchInsnNode => + var j = 0 + while (j < t.labels.size) { + enqInsn(t.labels.get(j), heightAfter); j += 1 + } + enqInsn(t.dflt, heightAfter) + + case r: VarInsnNode if r.getOpcode == RET => + enqInsn(subroutineRetTargets.pop(), heightAfter) + + case _ => + val opc = insn.getOpcode + if (opc != ATHROW && !isReturn(insn)) + enqInsnIndex(insnIndex + 1, heightAfter) + } + } + } + + method.maxLocals = maxLocals + method.maxStack = maxStack + + maxLocalsMaxStackComputed += method + } + } +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala index 8d8ea839e6..dd19ad594f 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala @@ -5,35 +5,74 @@ package analysis import scala.annotation.switch import scala.tools.asm.Opcodes._ import scala.tools.asm.Type -import scala.tools.asm.tree.{MultiANewArrayInsnNode, InvokeDynamicInsnNode, MethodInsnNode, AbstractInsnNode} +import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Frame, Value} import opt.BytecodeUtils._ -import collection.immutable object InstructionStackEffect { - private var cache: immutable.IntMap[(Int, Int)] = immutable.IntMap.empty - private def t(x: Int, y: Int): (Int, Int) = { - // x can go up to 255 (number of parameters of a method, dimensions in multianewarray) we cache - // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases). - if (x > 10 || y > 6) (x, y) - else { - val key = (x << 8) + y // this would work for any x < 256 - if (cache contains key) { - cache(key) - } else { - val r = (x, y) - cache += key -> r - r - } - } + val consShift = 3 + val prodMask = (1 << consShift) - 1 + + def cons(i: Int) = i >>> consShift + def prod(i: Int) = i & prodMask + + private def t(x: Int, y: Int): Int = (x << consShift) | y + + /** + * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int` + * (the `cons` / `prod` extract individual values). The returned values are correct for use in + * asm's Analyzer framework. For example, a LLOAD instruction produces one stack value. See also + * doc in `analysis` package object. + * + * This method requires the `frame` to be in the state **before** executing / interpreting the + * `insn`. + */ + def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): Int = computeConsProd(insn, forClassfile = false, conservative = false, frame = frame) + + /** + * Returns the maximal possible growth of the stack when executing `insn`. The returned value + * is usually the same as expected by asm's Analyzer framework, but it may be larger. For + * example, consider a POP2 instruction: + * - if two size-1 values are popped, then the asm Analyzer consumes two values + * - if a size-2 value is popped, the asm Analyzer consumes only one stack slot (see doc in the + * `analysis` package object) + * + * If a precise result is needed, invoke the `forAsmAnalysis` and provide a `frame` value that + * allows looking up the sizes of values on the stack. + */ + def maxStackGrowth(insn: AbstractInsnNode): Int = { + val prodCons = computeConsProd(insn, forClassfile = false, conservative = true) + prod(prodCons) - cons(prodCons) } /** - * Returns a pair with the number of stack values consumed and produced by `insn`. - * This method requires the `frame` to be in the state **before** executing / interpreting - * the `insn`. + * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int` + * (the `cons` / `prod` extract individual values). The returned values are correct for writing + * into a classfile (see doc on the `analysis` package object). */ - def apply[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = { + def forClassfile(insn: AbstractInsnNode): Int = computeConsProd(insn, forClassfile = true, conservative = false) + + private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): Int = { + val consumesReceiver = insn.getOpcode != INVOKESTATIC && insn.getOpcode != INVOKEDYNAMIC + if (forClassfile) { + val sizes = Type.getArgumentsAndReturnSizes(methodDesc) + val cons = (sizes >> 2) - (if (consumesReceiver) 0 else 1) + val prod = sizes & 0x03 + t(cons, prod) + } else { + val cons = Type.getArgumentTypes(methodDesc).length + (if (consumesReceiver) 1 else 0) + val prod = if (Type.getReturnType(methodDesc) == Type.VOID_TYPE) 0 else 1 + t(cons, prod) + } + } + + private def fieldInsnIsLongOrDouble(insn: AbstractInsnNode) = { + val d = insn.asInstanceOf[FieldInsnNode].desc + d == "J" || d == "D" + } + + private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean, conservative: Boolean, frame: Frame[V] = null): Int = { + // not used if `forClassfile || conservative`: in these cases, `frame` is allowed to be `null` def peekStack(n: Int): V = frame.peekStack(n) (insn.getOpcode: @switch) match { @@ -48,142 +87,176 @@ object InstructionStackEffect { ICONST_3 | ICONST_4 | ICONST_5 | - LCONST_0 | - LCONST_1 | FCONST_0 | FCONST_1 | FCONST_2 | - DCONST_0 | - DCONST_1 | BIPUSH | SIPUSH | - LDC | ILOAD | - LLOAD | FLOAD | - DLOAD | ALOAD => t(0, 1) + case LDC => + if (forClassfile) insn.asInstanceOf[LdcInsnNode].cst match { + case _: java.lang.Long | _: java.lang.Double => t(0, 2) + case _ => t(0, 1) + } else + t(0, 1) + + case LCONST_0 | + LCONST_1 | + DCONST_0 | + DCONST_1 | + LLOAD | + DLOAD => if (forClassfile) t(0, 2) else t(0, 1) + case IALOAD | - LALOAD | FALOAD | - DALOAD | AALOAD | BALOAD | CALOAD | SALOAD => t(2, 1) + case LALOAD | + DALOAD => if (forClassfile) t(2, 2) else t(2, 1) + case ISTORE | - LSTORE | FSTORE | - DSTORE | ASTORE => t(1, 0) + case LSTORE | + DSTORE => if (forClassfile) t(2, 0) else t(1, 0) + case IASTORE | - LASTORE | FASTORE | - DASTORE | AASTORE | BASTORE | CASTORE | SASTORE => t(3, 0) + case LASTORE | + DASTORE => if (forClassfile) t(4, 0) else t(3, 0) + case POP => t(1, 0) case POP2 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(1, 0) else t(2, 0) + if (forClassfile) t(2, 0) + else if (conservative) t(1, 0) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(1, 0) else t(2, 0) + } case DUP => t(1, 2) case DUP_X1 => t(2, 3) case DUP_X2 => - val isSize2 = peekStack(1).getSize == 2 - if (isSize2) t(2, 3) else t(3, 4) + if (forClassfile || conservative) t(3, 4) + else { + val isSize2 = peekStack(1).getSize == 2 + if (isSize2) t(2, 3) else t(3, 4) + } case DUP2 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(1, 2) else t(2, 4) + if (forClassfile || conservative) t(2, 4) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(1, 2) else t(2, 4) + } case DUP2_X1 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(2, 3) else t(3, 4) + if (forClassfile || conservative) t(3, 5) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(2, 3) else t(3, 5) + } case DUP2_X2 => - val v1isSize2 = peekStack(0).getSize == 2 - if (v1isSize2) { - val v2isSize2 = peekStack(1).getSize == 2 - if (v2isSize2) t(2, 3) else t(3, 4) - } else { - val v3isSize2 = peekStack(2).getSize == 2 - if (v3isSize2) t(3, 5) else t(4, 6) + if (forClassfile || conservative) t(4, 6) + else { + val v1isSize2 = peekStack(0).getSize == 2 + if (v1isSize2) { + val v2isSize2 = peekStack(1).getSize == 2 + if (v2isSize2) t(2, 3) else t(3, 4) + } else { + val v3isSize2 = peekStack(2).getSize == 2 + if (v3isSize2) t(3, 5) else t(4, 6) + } } case SWAP => t(2, 2) case IADD | - LADD | FADD | - DADD | ISUB | - LSUB | FSUB | - DSUB | IMUL | - LMUL | FMUL | - DMUL | IDIV | - LDIV | FDIV | - DDIV | IREM | + FREM => t(2, 1) + + case LADD | + DADD | + LSUB | + DSUB | + LMUL | + DMUL | + LDIV | + DDIV | LREM | - FREM | - DREM => t(2, 1) + DREM => if (forClassfile) t(4, 2) else t(2, 1) case INEG | - LNEG | - FNEG | - DNEG => t(1, 1) + FNEG => t(1, 1) + + case LNEG | + DNEG => if (forClassfile) t(2, 2) else t(1, 1) case ISHL | - LSHL | ISHR | - LSHR | IUSHR | - LUSHR | IAND | - LAND | IOR | + IXOR => t(2, 1) + + case LSHL | + LSHR | + LUSHR => if (forClassfile) t(3, 2) else t(2, 1) + + case LAND | LOR | - IXOR | - LXOR => t(2, 1) + LXOR => if (forClassfile) t(4, 2) else t(2, 1) case IINC => t(0, 0) - case I2L | - I2F | - I2D | - L2I | - L2F | - L2D | + case I2F | F2I | - F2L | - F2D | - D2I | - D2L | - D2F | I2B | I2C | I2S => t(1, 1) + case I2L | + I2D | + F2L | + F2D => if (forClassfile) t(1, 2) else t(1, 1) + + case L2I | + L2F | + D2I | + D2F => if (forClassfile) t(2, 1) else t(1, 1) + + case L2D | + D2L => if (forClassfile) t(2, 2) else t(1, 1) + + case FCMPL | + FCMPG => t(2, 1) + case LCMP | - FCMPL | - FCMPG | DCMPL | - DCMPG => t(2, 1) + DCMPG => if (forClassfile) t(4, 1) else t(2, 1) case IFEQ | IFNE | @@ -211,35 +284,36 @@ object InstructionStackEffect { LOOKUPSWITCH => t(1, 0) case IRETURN | - LRETURN | FRETURN | - DRETURN | ARETURN => t(1, 0) // Frame.execute consumes one stack value + case LRETURN | + DRETURN => if (forClassfile) t(2, 0) else t(1, 0) + case RETURN => t(0, 0) // Frame.execute does not change the stack - case GETSTATIC => t(0, 1) + case GETSTATIC => + val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(0, prod) - case PUTSTATIC => t(1, 0) + case PUTSTATIC => + val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(cons, 0) - case GETFIELD => t(1, 1) + case GETFIELD => + val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(1, prod) - case PUTFIELD => t(2, 0) + case PUTFIELD => + val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 3 else 2 + t(cons, 0) case INVOKEVIRTUAL | INVOKESPECIAL | INVOKESTATIC | - INVOKEINTERFACE => - val desc = insn.asInstanceOf[MethodInsnNode].desc - val cons = Type.getArgumentTypes(desc).length + (if (insn.getOpcode == INVOKESTATIC) 0 else 1) - val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1 - t(cons, prod) - - case INVOKEDYNAMIC => - val desc = insn.asInstanceOf[InvokeDynamicInsnNode].desc - val cons = Type.getArgumentTypes(desc).length - val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1 - t(cons, prod) + INVOKEINTERFACE => invokeConsProd(insn.asInstanceOf[MethodInsnNode].desc, insn, forClassfile) + + case INVOKEDYNAMIC => invokeConsProd(insn.asInstanceOf[InvokeDynamicInsnNode].desc, insn, forClassfile) case NEW => t(0, 1) @@ -261,5 +335,4 @@ object InstructionStackEffect { IFNONNULL => t(1, 0) } } - } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala index 31b62f747e..30e73f8ac2 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala @@ -7,66 +7,12 @@ import java.util import scala.annotation.switch import scala.tools.asm.{Type, Opcodes} import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode} -import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value} +import scala.tools.asm.tree.analysis._ import scala.tools.nsc.backend.jvm.opt.BytecodeUtils import BytecodeUtils._ /** - * Some notes on the ASM analyzer framework. - * - * Value - * - Abstract, needs to be implemented for each analysis. - * - Represents the desired information about local variables and stack values, for example: - * - Is this value known to be null / not null? - * - What are the instructions that could potentially have produced this value? - * - * Interpreter - * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing - * interpreter, e.g., SourceInterpreter or BasicInterpreter. - * - Multiple abstract methods that receive an instruction and the instruction's input values, and - * return a value representing the result of that instruction. - * - Note: due to control flow, the interpreter can be invoked multiple times for the same - * instruction, until reaching a fixed point. - * - Abstract `merge` function that computes the least upper bound of two values. Used by - * Frame.merge (see below). - * - * Frame - * - Can be used directly for many analyses, no subclass required. - * - Every frame has an array of values: one for each local variable and for each stack slot. - * - A `top` index stores the index of the current stack top - * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty - * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next - * stack value. - * - Defines the `execute(instruction)` method. - * - executing mutates the state of the frame according to the effect of the instruction - * - pop consumed values from the stack - * - pass them to the interpreter together with the instruction - * - if applicable, push the resulting value on the stack - * - Defines the `merge(otherFrame)` method - * - called by the analyzer when multiple control flow paths lead to an instruction - * - the frame at the branching instruction is merged into the current frame of the - * instruction (held by the analyzer) - * - mutates the values of the current frame, merges all values using interpreter.merge. - * - * Analyzer - * - Stores a frame for each instruction - * - `merge` function takes an instruction and a frame, merges the existing frame for that instr - * (from the frames array) with the new frame passed as argument. - * if the frame changed, puts the instruction on the work queue (fixpiont). - * - initial frame: initialized for first instr by calling interpreter.new[...]Value - * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` - * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) - * - analyze(method): simulate control flow. while work queue non-empty: - * - copy the state of `frames[instr]` into a local frame `current` - * - call `current.execute(instr, interpreter)`, mutating the `current` frame - * - if it's a branching instruction - * - for all potential destination instructions - * - merge the destination instruction frame with the `current` frame - * (this enqueues the destination instr if its frame changed) - * - invoke `newControlFlowEdge` (see below) - * - the analyzer also tracks active exception handlers at each instruction - * - the empty method `newControlFlowEdge` can be overridden to track control flow if required - * + * See the package object `analysis` for details on the ASM analysis framework. * * Some notes on nullness analysis. * @@ -87,59 +33,37 @@ import BytecodeUtils._ */ /** - * Type to represent nullness of values. - */ -sealed trait Nullness { - final def merge(other: Nullness) = if (this == other) this else Unknown -} -case object NotNull extends Nullness -case object Unknown extends Nullness -case object Null extends Nullness - -/** * Represents the nullness state for a local variable or stack value. * - * Note that nullness of primitive values is not tracked, it will be always [[Unknown]]. + * Note that nullness of primitive values is not tracked, it will be always unknown. */ -sealed trait NullnessValue extends Value { - /** - * The nullness of this value. - */ - def nullness: Nullness - - /** - * True if this value is a long or double. The Analyzer framework needs to know - * the size of each value when interpreting instructions, see `Frame.execute`. - */ - def isSize2: Boolean +sealed abstract class NullnessValue(final val isSize2: Boolean) extends Value { /** * The size of the slot described by this value. Cannot be 0 because no values are allocated * for void-typed slots, see NullnessInterpreter.newValue. **/ def getSize: Int = if (isSize2) 2 else 1 - def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, isSize2) + def merge(other: NullnessValue) = { + if (this eq other) this + else if (this eq UnknownValue2) this // the only possible value of size two + else UnknownValue1 + } + + final override def equals(other: Any) = this eq other.asInstanceOf[Object] } -object NullValue extends NullnessValue { def nullness = Null; def isSize2 = false; override def toString = "Null" } -object UnknownValue1 extends NullnessValue { def nullness = Unknown; def isSize2 = false; override def toString = "Unknown1" } -object UnknownValue2 extends NullnessValue { def nullness = Unknown; def isSize2 = true; override def toString = "Unknown2" } -object NotNullValue extends NullnessValue { def nullness = NotNull; def isSize2 = false; override def toString = "NotNull" } +object NullValue extends NullnessValue(isSize2 = false) { override def toString = "Null" } +object UnknownValue1 extends NullnessValue(isSize2 = false) { override def toString = "Unknown1" } +object UnknownValue2 extends NullnessValue(isSize2 = true ) { override def toString = "Unknown2" } +object NotNullValue extends NullnessValue(isSize2 = false) { override def toString = "NotNull" } object NullnessValue { - def apply(nullness: Nullness, isSize2: Boolean): NullnessValue = { - if (nullness == Null) NullValue - else if (nullness == NotNull) NotNullValue - else if (isSize2) UnknownValue2 - else UnknownValue1 - } - - def apply(nullness: Nullness, insn: AbstractInsnNode): NullnessValue = { - apply(nullness, isSize2 = BytecodeUtils.instructionResultSize(insn) == 2) - } + def unknown(isSize2: Boolean) = if (isSize2) UnknownValue2 else UnknownValue1 + def unknown(insn: AbstractInsnNode) = if (BytecodeUtils.instructionResultSize(insn) == 2) UnknownValue2 else UnknownValue1 } -final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) { +final class NullnessInterpreter(bTypes: BTypes) extends Interpreter[NullnessValue](Opcodes.ASM5) { def newValue(tp: Type): NullnessValue = { // ASM loves giving semantics to null. The behavior here is the same as in SourceInterpreter, // which is provided by the framework. @@ -151,29 +75,25 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes // `newValue(null)` for each local variable. We have to return a value of size 1. if (tp == Type.VOID_TYPE) null // (1) - else NullnessValue(Unknown, isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) + else NullnessValue.unknown(isSize2 = tp != null /*(2)*/ && tp.getSize == 2 ) } override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = { // For instance methods, the `this` parameter is known to be not null. - if (isInstanceMethod && local == 0) NullnessValue(NotNull, isSize2 = false) + if (isInstanceMethod && local == 0) NotNullValue else super.newParameterValue(isInstanceMethod, local, tp) } - def newOperation(insn: AbstractInsnNode): NullnessValue = { - val nullness = (insn.getOpcode: @switch) match { - case Opcodes.ACONST_NULL => Null + def newOperation(insn: AbstractInsnNode): NullnessValue = (insn.getOpcode: @switch) match { + case Opcodes.ACONST_NULL => NullValue - case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { - case _: String | _: Type => NotNull - case _ => Unknown - } - - case _ => Unknown + case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match { + case _: String | _: Type => NotNullValue + case _ => NullnessValue.unknown(insn) } // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call. - NullnessValue(nullness, insn) + case _ => NullnessValue.unknown(insn) } def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value @@ -182,26 +102,24 @@ final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) case Opcodes.CHECKCAST => value case Opcodes.NEWARRAY | - Opcodes.ANEWARRAY => NullnessValue(NotNull, isSize2 = false) + Opcodes.ANEWARRAY => NotNullValue - case _ => NullnessValue(Unknown, insn) + case _ => NullnessValue.unknown(insn) } def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = { - NullnessValue(Unknown, insn) + NullnessValue.unknown(insn) } - def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = { - NullnessValue(Unknown, isSize2 = false) - } + def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = UnknownValue1 - def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match { - case Opcodes.MULTIANEWARRAY => - NullnessValue(NotNull, isSize2 = false) + def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = insn match { + case mi: MethodInsnNode if bTypes.backendUtils.isNonNullMethodInvocation(mi) => + NotNullValue case _ => - // TODO: use a list of methods that are known to return non-null values - NullnessValue(Unknown, insn) + if (insn.getOpcode == Opcodes.MULTIANEWARRAY) NotNullValue + else NullnessValue.unknown(insn) } def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = () @@ -219,8 +137,10 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = { import Opcodes._ - // get the object id of the object that is known to be not-null after this operation - val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match { + // get the alias set the object that is known to be not-null after this operation. + // alias sets are mutable / mutated, so after super.execute, this set contains the remaining + // aliases of the value that becomes not-null. + val nullCheckedAliases: AliasSet = (insn.getOpcode: @switch) match { case IALOAD | LALOAD | FALOAD | @@ -229,7 +149,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal BALOAD | CALOAD | SALOAD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case IASTORE | FASTORE | @@ -239,35 +159,36 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal SASTORE | LASTORE | DASTORE => - aliasId(this.stackTop - 2) + aliasesOf(this.stackTop - 2) case GETFIELD => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case PUTFIELD => - aliasId(this.stackTop - 1) + aliasesOf(this.stackTop - 1) case INVOKEVIRTUAL | INVOKESPECIAL | INVOKEINTERFACE => val desc = insn.asInstanceOf[MethodInsnNode].desc val numArgs = Type.getArgumentTypes(desc).length - aliasId(this.stackTop - numArgs) + aliasesOf(this.stackTop - numArgs) case ARRAYLENGTH | MONITORENTER | MONITOREXIT => - aliasId(this.stackTop) + aliasesOf(this.stackTop) case _ => - -1 + null } super.execute(insn, interpreter) - if (nullCheckedAliasId != -1) { - for (i <- valuesWithAliasId(nullCheckedAliasId)) - this.setValue(i, NotNullValue) + if (nullCheckedAliases != null) { + val it = nullCheckedAliases.iterator + while (it.hasNext) + this.setValue(it.next(), NotNullValue) } } } @@ -276,7 +197,7 @@ class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessVal * This class is required to override the `newFrame` methods, which makes makes sure the analyzer * uses NullnessFrames. */ -class NullnessAnalyzer extends Analyzer[NullnessValue](new NullnessInterpreter) { +class NullnessAnalyzer(bTypes: BTypes) extends Analyzer[NullnessValue](new NullnessInterpreter(bTypes)) { override def newFrame(nLocals: Int, nStack: Int): NullnessFrame = new NullnessFrame(nLocals, nStack) override def newFrame(src: Frame[_ <: NullnessValue]): NullnessFrame = new NullnessFrame(src) } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala index 594fd8923c..8af4bd4d5d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzer.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala @@ -15,11 +15,10 @@ import scala.tools.asm.{Type, MethodVisitor} import scala.tools.asm.Opcodes._ import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis._ -import scala.tools.nsc.backend.jvm.BTypes.InternalName import opt.BytecodeUtils._ -import scala.collection.convert.decorateAsScala._ +import scala.collection.JavaConverters._ /** * This class provides additional queries over ASM's built-in `SourceValue` analysis. @@ -55,24 +54,16 @@ import scala.collection.convert.decorateAsScala._ * * If ever needed, we could introduce a mode where primitive conversions (l2i) are considered as * copying operations. + * + * Note on performance: thee data flow analysis (SourceValue / SourceInterpreter, provided by ASM) + * is roughly 2-3x slower than a simple analysis (like BasicValue). The reason is that the merge + * function (merging producer sets) is more complex than merging simple basic values. + * See also the doc comment in the package object `analysis`. */ -class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) { - - /* Timers for benchmarking ProdCons - import scala.reflect.internal.util.Statistics._ - import ProdConsAnalyzer._ - val analyzerTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - analysis", prodConsAnalyzerTimer) - val consumersTimer = newSubTimer(classInternalName + "#" + methodNode.name + " - consumers", prodConsAnalyzerTimer) - */ - - val analyzer = new Analyzer(new InitialProducerSourceInterpreter) +trait ProdConsAnalyzerImpl { + val methodNode: MethodNode -// val start = analyzerTimer.start() - analyzer.analyze(classInternalName, methodNode) -// analyzerTimer.stop(start) -// println(analyzerTimer.line) - - def frameAt(insn: AbstractInsnNode) = analyzer.frameAt(insn, methodNode) + def frameAt(insn: AbstractInsnNode): Frame[SourceValue] /** * Returns the potential producer instructions of a (local or stack) value in the frame of `insn`. @@ -102,8 +93,13 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) inputValues(insn).iterator.flatMap(v => v.insns.asScala).toSet } - def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = - _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty) + def consumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match { + case _: UninitializedLocalProducer => Set.empty + case ParameterProducer(local) => consumersOfValueAt(methodNode.instructions.getFirst, local) + case ExceptionProducer(handlerLabel, handlerFrame) => consumersOfValueAt(handlerLabel, handlerFrame.stackTop) + case _ => + _consumersOfOutputsFrom.get(insn).map(v => v.indices.flatMap(v.apply)(collection.breakOut): Set[AbstractInsnNode]).getOrElse(Set.empty) + } /** * Returns the potential initial producer instructions of a value in the frame of `insn`. @@ -159,13 +155,19 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) inputValueSlots(insn).flatMap(slot => initialProducersForValueAt(insn, slot)).toSet } - def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = { - lazy val next = insn.getNext - outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet + def ultimateConsumersOfOutputsFrom(insn: AbstractInsnNode): Set[AbstractInsnNode] = insn match { + case _: UninitializedLocalProducer => Set.empty + case _ => + lazy val next = insn match { + case _: ParameterProducer => methodNode.instructions.getFirst + case ExceptionProducer(handlerLabel, _) => handlerLabel + case _ => insn.getNext + } + outputValueSlots(insn).flatMap(slot => ultimateConsumersOfValueAt(next, slot)).toSet } private def isCopyOperation(insn: AbstractInsnNode): Boolean = { - isVarInstruction(insn) || { + isLoadOrStore(insn) || { (insn.getOpcode: @switch) match { case DUP | DUP_X1 | DUP_X2 | DUP2 | DUP2_X1 | DUP2_X2 | SWAP | CHECKCAST => true case _ => false @@ -376,9 +378,9 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame) val stackSize = frame.getLocals + frame.getStackSize - (stackSize - stackEffect._1) until stackSize + (stackSize - InstructionStackEffect.cons(prodCons)) until stackSize } } @@ -386,7 +388,7 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) private def outputValueSlots(insn: AbstractInsnNode): Seq[Int] = insn match { case ParameterProducer(local) => Seq(local) case UninitializedLocalProducer(local) => Seq(local) - case ExceptionProducer(frame) => Seq(frame.stackTop) + case ExceptionProducer(_, frame) => Seq(frame.stackTop) case _ => if (insn.getOpcode == -1) return Seq.empty if (isStore(insn)) { @@ -395,16 +397,15 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame) val nextFrame = frameAt(insn.getNext) val stackSize = nextFrame.getLocals + nextFrame.getStackSize - (stackSize - stackEffect._2) until stackSize + (stackSize - InstructionStackEffect.prod(prodCons)) until stackSize } } /** For each instruction, a set of potential consumers of the produced values. */ private lazy val _consumersOfOutputsFrom: Map[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] = { -// val start = consumersTimer.start() var res = Map.empty[AbstractInsnNode, Vector[Set[AbstractInsnNode]]] for { insn <- methodNode.instructions.iterator.asScala @@ -417,8 +418,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) val outputIndex = producedSlots.indexOf(i) res = res.updated(producer, currentConsumers.updated(outputIndex, currentConsumers(outputIndex) + insn)) } -// consumersTimer.stop(start) -// println(consumersTimer.line) res } @@ -426,11 +425,6 @@ class ProdConsAnalyzer(methodNode: MethodNode, classInternalName: InternalName) private val _ultimateConsumersCache: mutable.AnyRefMap[(AbstractInsnNode, Int), Set[AbstractInsnNode]] = mutable.AnyRefMap.empty } -object ProdConsAnalyzer { - import scala.reflect.internal.util.Statistics._ - val prodConsAnalyzerTimer = newTimer("Time in ProdConsAnalyzer", "jvm") -} - /** * A class for pseudo-instructions representing the initial producers of local values that have * no producer instruction in the method: @@ -446,10 +440,10 @@ object ProdConsAnalyzer { * return a; * } * - * In the first frame of the method, the SoruceValue for parameter `a` gives an empty set of + * In the first frame of the method, the SourceValue for parameter `a` gives an empty set of * producer instructions. * - * In the frame of the `IRETURN` instruction, the SoruceValue for parameter `a` lists a single + * In the frame of the `IRETURN` instruction, the SourceValue for parameter `a` lists a single * producer instruction: the `ISTORE 1`. This makes it look as if there was a single producer for * `a`, where in fact it might still hold the parameter's initial value. */ @@ -459,9 +453,9 @@ abstract class InitialProducer extends AbstractInsnNode(-1) { override def accept(cv: MethodVisitor): Unit = throw new UnsupportedOperationException } -case class ParameterProducer(local: Int) extends InitialProducer -case class UninitializedLocalProducer(local: Int) extends InitialProducer -case class ExceptionProducer(handlerFrame: Frame[_ <: Value]) extends InitialProducer +case class ParameterProducer(local: Int) extends InitialProducer +case class UninitializedLocalProducer(local: Int) extends InitialProducer +case class ExceptionProducer[V <: Value](handlerLabel: LabelNode, handlerFrame: Frame[V]) extends InitialProducer class InitialProducerSourceInterpreter extends SourceInterpreter { override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): SourceValue = { @@ -473,6 +467,6 @@ class InitialProducerSourceInterpreter extends SourceInterpreter { } override def newExceptionValue(tryCatchBlockNode: TryCatchBlockNode, handlerFrame: Frame[_ <: Value], exceptionType: Type): SourceValue = { - new SourceValue(1, ExceptionProducer(handlerFrame)) + new SourceValue(1, ExceptionProducer(tryCatchBlockNode.handler, handlerFrame)) } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala new file mode 100644 index 0000000000..bcf9978c16 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/TypeFlowInterpreter.scala @@ -0,0 +1,36 @@ +package scala.tools.nsc +package backend.jvm +package analysis + +import scala.tools.asm.Type +import scala.tools.asm.tree.analysis.{BasicValue, BasicInterpreter} + +abstract class TypeFlowInterpreter extends BasicInterpreter { + override def newValue(tp: Type) = { + if (tp == null) super.newValue(tp) + else if (isRef(tp)) new BasicValue(tp) + else super.newValue(tp) + } + + def isRef(tp: Type) = tp != null && (tp.getSort match { + case Type.OBJECT | Type.ARRAY => true + case _ => false + }) + + def refLub(a: BasicValue, b: BasicValue): BasicValue + + override def merge(a: BasicValue, b: BasicValue): BasicValue = { + if (a == b) a + else if (isRef(a.getType) && isRef(b.getType)) refLub(a, b) + else BasicValue.UNINITIALIZED_VALUE + } +} + +/** + * A [[TypeFlowInterpreter]] which collapses LUBs of non-equal reference types to Object. + * This could be made more precise by looking up ClassBTypes for the two reference types and using + * the `jvmWiseLUB` method. + */ +class NonLubbingTypeFlowInterpreter extends TypeFlowInterpreter { + def refLub(a: BasicValue, b: BasicValue): BasicValue = BasicValue.REFERENCE_VALUE // java/lang/Object +} diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala new file mode 100644 index 0000000000..ef961941a0 --- /dev/null +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/package.scala @@ -0,0 +1,374 @@ +package scala.tools.nsc.backend.jvm + +/** + * Summary on the ASM analyzer framework + * -------------------------------------- + * + * Value + * - Abstract, needs to be implemented for each analysis. + * - Represents the desired information about local variables and stack values, for example: + * - Is this value known to be null / not null? + * - What are the instructions that could potentially have produced this value? + * + * Interpreter + * - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing + * interpreter, e.g., SourceInterpreter or BasicInterpreter. + * - Multiple abstract methods that receive an instruction and the instruction's input values, and + * return a value representing the result of that instruction. + * - Note: due to control flow, the interpreter can be invoked multiple times for the same + * instruction, until reaching a fixed point. + * - Abstract `merge` function that computes the least upper bound of two values. Used by + * Frame.merge (see below). + * + * Frame + * - Can be used directly for many analyses, no subclass required. + * - Every frame has an array of values: one for each local variable and for each stack slot. + * - A `top` index stores the index of the current stack top + * - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty + * value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next + * stack value. IMPORTANT: this is only the case in ASM's analysis framework, not in bytecode. + * See comment below. + * - Defines the `execute(instruction)` method. + * - executing mutates the state of the frame according to the effect of the instruction + * - pop consumed values from the stack + * - pass them to the interpreter together with the instruction + * - if applicable, push the resulting value on the stack + * - Defines the `merge(otherFrame)` method + * - called by the analyzer when multiple control flow paths lead to an instruction + * - the frame at the branching instruction is merged into the current frame of the + * instruction (held by the analyzer) + * - mutates the values of the current frame, merges all values using interpreter.merge. + * + * Analyzer + * - Stores a frame for each instruction + * - `merge` function takes an instruction and a frame, merges the existing frame for that instr + * (from the frames array) with the new frame passed as argument. + * if the frame changed, puts the instruction on the work queue (fixpoint). + * - initial frame: initialized for first instr by calling interpreter.new[...]Value + * for each slot (locals and params), stored in frames[firstInstr] by calling `merge` + * - work queue of instructions (`queue` array, `top` index for next instruction to analyze) + * - analyze(method): simulate control flow. while work queue non-empty: + * - copy the state of `frames[instr]` into a local frame `current` + * - call `current.execute(instr, interpreter)`, mutating the `current` frame + * - if it's a branching instruction + * - for all potential destination instructions + * - merge the destination instruction frame with the `current` frame + * (this enqueues the destination instr if its frame changed) + * - invoke `newControlFlowEdge` (see below) + * - the analyzer also tracks active exception handlers at each instruction + * - the empty method `newControlFlowEdge` can be overridden to track control flow if required + * + * + * MaxLocals and MaxStack + * ---------------------- + * + * At the JVM level, long and double values occupy two slots, both as local variables and on the + * stack, as specified in the JVM spec 2.6.2: + * "At any point in time, an operand stack has an associated depth, where a value of type long or + * double contributes two units to the depth and a value of any other type contributes one unit." + * + * For example, a method + * class A { def f(a: Long, b: Long) = a + b } + * has MAXSTACK=4 in the classfile. This value is computed by the ClassWriter / MethodWriter when + * generating the classfile (we always pass COMPUTE_MAXS to the ClassWriter). + * + * For running an ASM Analyzer, long and double values occupy two local variable slots, but only + * a single slot on the call stack, as shown by the following snippet: + * + * import scala.tools.nsc.backend.jvm._ + * import scala.tools.nsc.backend.jvm.opt.BytecodeUtils._ + * import scala.collection.convert.decorateAsScala._ + * import scala.tools.asm.tree.analysis._ + * + * val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/A.class") + * val m = cn.methods.iterator.asScala.find(_.name == "f").head + * + * // the value is read from the classfile, so it's 4 + * println(s"maxLocals: ${m.maxLocals}, maxStack: ${m.maxStack}") // maxLocals: 5, maxStack: 4 + * + * // we can safely set it to 2 for running the analyzer. + * m.maxStack = 2 + * + * val a = new Analyzer(new BasicInterpreter) + * a.analyze(cn.name, m) + * val addInsn = m.instructions.iterator.asScala.find(_.getOpcode == 97).get // LADD Opcode + * val addFrame = a.frameAt(addInsn, m) + * + * addFrame.getStackSize // 2: the two long values only take one slot each + * addFrame.getLocals // 5: this takes one slot, the two long parameters take 2 slots each + * + * + * While running the optimizer, we need to make sure that the `maxStack` value of a method is + * large enough for running an ASM analyzer. We don't need to worry if the value is incorrect in + * the JVM perspective: the value will be re-computed and overwritten in the ClassWriter. + * + * + * Lessons learnt while benchmarking the alias tracking analysis + * ------------------------------------------------------------- + * + * Profiling + * - Use YourKit for finding hotspots (cpu profiling). when it comes to drilling down into the details + * of a hotspot, don't pay too much attention to the percentages / time counts. + * - Should also try other profilers. + * - Use timers. When a method showed up as a hotspot, i added a timer around that method, and a + * second one within the method to measure specific parts. The timers slow things down, but the + * relative numbers show what parts of a method are slow. + * + * ASM analyzer insights + * - The time for running an analysis depends on the number of locals and the number of instructions. + * Reducing the number of locals helps speeding up the analysis: there are less values to + * merge when merging to frames. + * See also https://github.com/scala/scala-dev/issues/47 + * - The common hot spot of an ASM analysis is Frame.merge, for example in producers / consumers. + * - For nullness analysis the time is spent as follows + * - 20% merging nullness values. this is as expected: for example, the same absolute amount of + * time is spent in merging BasicValues when running a BasicInterpreter. + * - 50% merging alias sets. i tried to optimize what i could out of this. + * - 20% is spent creating new frames from existing ones, see comment on AliasingFrame.init. + * - The implementation of Frame.merge (the main hot spot) contains a megamorphic callsite to + * `interpreter.merge`. This can be observed easily by running a test program that either runs + * a BasicValue analysis only, versus a program that first runs a nullness analysis and then + * a BasicValue. In an example, the time for the BasicValue analysis goes from 519ms to 1963ms, + * a 3.8x slowdown. + * - I added counters to the Frame.merge methods for nullness and BasicValue analysis. In the + * examples I benchmarked, the number of merge invocations was always exactly the same. + * It would probably be possible to come up with an example where alias set merging forces + * additional analysis rounds until reaching the fixpoint, but I did not observe such cases. + * + * To benchmark an analysis, instead of benchmarking analysis while it runs in the compiler + * backend, one can easily run it from a separate program (or the repl). The bytecode to analyze + * can simply be parsed from a classfile. See example at the end of this comment. + * + * + * Nullness Analysis in Miguel's Optimizer + * --------------------------------------- + * + * Miguel implemented alias tracking for nullness analysis differently [1]. Remember that every + * frame has an array of values. Miguel's idea was to represent aliasing using reference equality + * in the values array: if two entries in the array point to the same value object, the two entries + * are aliases in the frame of the given instruction. + * + * While this idea seems elegant at first sight, Miguel's implementation does not merge frames + * correctly when it comes to aliasing. Assume in frame 1, values (a, b, c) are aliases, while in + * frame 2 (a, b) are aliases. When merging the second into the first, we have to make sure that + * c is removed as an alias of (a, b). + * + * It would be possible to implement correct alias set merging in Miguel's approach. However, frame + * merging is the main hot spot of analysis. The computational complexity of implementing alias set + * merging by traversing the values array and comparing references is too high. The concrete + * alias set representation that is used in the current implementation (see class AliasingFrame) + * makes alias set merging more efficient. + * + * [1] https://github.com/scala-opt/scala/blob/opt/rebase/src/compiler/scala/tools/nsc/backend/bcode/NullnessPropagator.java + * + * + * Complexity and scaling of analysis + * ---------------------------------- + * + * The time complexity of a data flow analysis depends on: + * + * - The size of the method. The complexity factor is linear (assuming the number of locals and + * branching instructions remains constant). The main analysis loop runs through all + * instructions of a method once. Instructions are only re-enqueued if a control flow merge + * changes the frame at some instruction. + * + * - The branching instructions. When a second (third, ..) control flow edge arrives at an + * instruction, the existing frame at the instruction is merged with the one computed on the + * new branch. If the merge function changes the existing frame, the instruction is enqueued + * for another analysis. This results in a merge operation for the successors of the + * instruction. + * + * - The number of local variables. The hot spot of analysis is frame merging. The merge function + * iterates through the values in the frame (locals and stack values) and merges them. + * + * I measured the running time of an analysis for two examples: + * - Keep the number of locals and branching instructions constant, increase the number of + * instructions. The running time grows linearly with the method size. + * - Increase the size and number of locals in a method. The method size and number of locals + * grow in the same pace. Here, the running time increase is polynomial. It looks like the + * complexity is be #instructions * #locals^2 (see below). + * + * I measured nullness analysis (which tracks aliases) and a SimpleValue analysis. Nullness runs + * roughly 5x slower (because of alias tracking) at every problem size - this factor doesn't change. + * + * The numbers below are for nullness. Note that the last column is constant, i.e., the running + * time is proportional to #ins * #loc^2. Therefore we use this factor when limiting the maximal + * method size for running an analysis. + * + * #insns #locals time (ms) time / #ins * #loc^2 * 10^6 + * 1305 156 34 1.07 + * 2610 311 165 0.65 + * 3915 466 490 0.57 + * 5220 621 1200 0.59 + * 6525 776 2220 0.56 + * 7830 931 3830 0.56 + * 9135 1086 6570 0.60 + * 10440 1241 9700 0.60 + * 11745 1396 13800 0.60 + * + * As a second experiment, nullness analysis was run with varying #insns but constant #locals. + * The last column shows linear complexity with respect to the method size (linearOffset = 2279): + * + * #insns #locals time (ms) (time + linearOffset) / #insns + * 5220 621 1090 0.645 + * 6224 621 1690 0.637 + * 7226 621 2280 0.630 + * 8228 621 2870 0.625 + * 9230 621 3530 0.629 + * 10232 621 4130 0.626 + * 11234 621 4770 0.627 + * 12236 621 5520 0.637 + * 13238 621 6170 0.638 + * + * + * When running a BasicValue analysis, the complexity observation is the same (time is proportional + * to #ins * #loc^2). + * + * + * Measuring analysis execution time + * --------------------------------- + * + * See code below. + */ + +/* +object Test { + val overwrite: Option[String] = null + + @noinline def serialize(o: AnyRef): String = null + + @noinline def deserialize(string: String): AnyRef = null + + @inline def checkRoundTrip[T <: AnyRef](instance: T)(f: T => AnyRef) { + val result = serialize(instance) + val reconstituted = deserialize(result).asInstanceOf[T] + assert(f(instance) == f(reconstituted), (f(instance), f(reconstituted))) + } + + @inline def check[T <: AnyRef](instance: => T)(prevResult: String, f: T => AnyRef = (x: T) => x) { + // pattern match to introduce a lot of control flow, i.e., a lot of frame merges + overwrite match { + case Some(f) => + case None => + checkRoundTrip(instance)(f) + assert(f(deserialize(prevResult).asInstanceOf[T]) == f(instance), instance) + assert(prevResult == "res", instance) + } + } + + // @inline def fun[T <: AnyRef](instance: => T) = (x: T) => x + + def testMain(): Unit = { + // every call to check creates quite a number of locals, and also quite a number of aliases + // of the same value (x1). First of all, the default argument call is expanded as below. Then + // method check is inlined, and within the body of check, checkRoundTrip and assert have + // already been inlined as well. + + // { + // val x1 = () => "" + // val x2 = fun(x1()) // the compiler optimizes this: instead of passing `() => x1()`, it just passes x1 + // check(x1())("", x2) // same here for x1 + // } + + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 5 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 10 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 15 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 20 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 25 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 30 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 35 + check("")("") + check("")("") + check("")("") + check("")("") + check("")("") // 40 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 45 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 50 + // check("")("") + // check("")("") + // check("")("") + // check("")("") + // check("")("") // 55 + + // 1000 bytecode instructions, 0 locals + // println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); println((1,2,3,4,5,6,7,8,9,10)); + } + + def timed[T](f: => T): T = { + val start = System.nanoTime() + val r = f + val nanos = System.nanoTime() - start + println(s"took ${nanos/1000000}ms") + r + } + + def main(args: Array[String]): Unit = { + import scala.tools.nsc.backend.jvm._ + val cn = AsmUtils.readClass("/Users/luc/scala/scala/sandbox/Test$.class") + import scala.collection.convert.decorateAsScala._ + val m = cn.methods.iterator.asScala.find(_.name == "testMain").head + + println(s"${m.instructions.size} instructions - ${m.maxLocals} locals") + + val a = new analysis.NullnessAnalyzer + a.analyze(cn.name, m) // warm up + + analysis.AliasingFrame.reset() + timed(a.analyze(cn.name, m)) + analysis.AliasingFrame.timers foreach println + + println("---") + + // NOTE: if we don't run nullness analysis above (comment it out), then the BasicValue + // analysis runs 3.5x faster. Most likely because the call to Interpreter.merge inside + // Frame.merge is no longer megamorphic. + + import scala.tools.asm.tree.analysis._ + val ba = new Analyzer(new BasicInterpreter) + ba.analyze(cn.name, m) // warm up + + timed(ba.analyze(cn.name, m)) + + println("---") + + timed(a.analyze(cn.name, m)) + } +} +*/ +package object analysis |