1 files changed, 262 insertions, 0 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
new file mode 100644
index 0000000000..18c17bc992
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/NullnessAnalyzer.scala
@@ -0,0 +1,262 @@
+package scala.tools.nsc
+package backend.jvm
+package analysis
+
+import java.util
+
+import scala.annotation.switch
+import scala.tools.asm.{Type, Opcodes}
+import scala.tools.asm.tree.{MethodInsnNode, LdcInsnNode, AbstractInsnNode}
+import scala.tools.asm.tree.analysis.{Frame, Analyzer, Interpreter, Value}
+import scala.tools.nsc.backend.jvm.opt.BytecodeUtils
+import BytecodeUtils._
+
+/**
+ * Some notes on the ASM ananlyzer framework.
+ *
+ * Value
+ *  - Abstract, needs to be implemented for each analysis.
+ *  - Represents the desired information about local variables and stack values, for example:
+ *    - Is this value known to be null / not null?
+ *    - What are the instructions that could potentially have produced this value?
+ *
+ * Interpreter
+ *  - Abstract, needs to be implemented for each analysis. Sometimes one can subclass an existing
+ *    interpreter, e.g., SourceInterpreter or BasicInterpreter.
+ *  - Multiple abstract methods that receive an instruction and the instruction's input values, and
+ *    return a value representing the result of that instruction.
+ *    - Note: due to control flow, the interpreter can be invoked multiple times for the same
+ *      instruction, until reaching a fixed point.
+ *  - Abstract `merge` function that computes the least upper bound of two values. Used by
+ *    Frame.merge (see below).
+ *
+ * Frame
+ *  - Can be used directly for many analyses, no subclass required.
+ *  - Every frame has an array of values: one for each local variable and for each stack slot.
+ *    - A `top` index stores the index of the current stack top
+ *    - NOTE: for a size-2 local variable at index i, the local variable at i+1 is set to an empty
+ *      value. However, for a size-2 value at index i on the stack, the value at i+1 holds the next
+ *      stack value.
+ *  - Defines the `execute(instruction)` method.
+ *    - executing mutates the state of the frame according to the effect of the instruction
+ *      - pop consumed values from the stack
+ *      - pass them to the interpreter together with the instruction
+ *      - if applicable, push the resulting value on the stack
+ *  - Defines the `merge(otherFrame)` method
+ *    - called by the analyzer when multiple control flow paths lead to an instruction
+ *      - the frame at the branching instruction is merged into the current frame of the
+ *        instruction (held by the analyzer)
+ *      - mutates the values of the current frame, merges all values using interpreter.merge.
+ *
+ * Analyzer
+ *   - Stores a frame for each instruction
+ *   - `merge` function takes an instruction and a frame, merges the existing frame for that instr
+ *     (from the frames array) with the new frame passed as argument.
+ *     if the frame changed, puts the instruction on the work queue (fixpiont).
+ *   - initial frame: initialized for first instr by calling interpreter.new[...]Value
+ *     for each slot (locals and params), stored in frames[firstInstr] by calling `merge`
+ *   - work queue of instructions (`queue` array, `top` index for next instruction to analyze)
+ *   - analyze(method): simulate control flow. while work queue non-empty:
+ *     - copy the state of `frames[instr]` into a local frame `current`
+ *     - call `current.execute(instr, interpreter)`, mutating the `current` frame
+ *     - if it's a branching instruction
+ *       - for all potential destination instructions
+ *         - merge the destination instruction frame with the `current` frame
+ *           (this enqueues the destination instr if its frame changed)
+ *       - invoke `newControlFlowEdge` (see below)
+ *   - the analyzer also tracks active exception handlers at each instruction
+ *   - the empty method `newControlFlowEdge` can be overridden to track control flow if required
+ *
+ *
+ * Some notes on nullness analysis.
+ *
+ * For an instance method, `this` is non-null at entry. So we have to return a NotNull value when
+ * the analyzer is initializing the first frame of a method (see above). This required a change of
+ * the analyzer: before it would simply call `interpreter.newValue`, where we don't have the
+ * required context. See https://github.com/scala/scala-asm/commit/8133d75032.
+ *
+ * After some operations we know that a certain value is not null (e.g. the receiver of an instance
+ * call). However, the receiver is an value on the stack and consumed while interpreting the
+ * instruction - so we can only gain some knowledge if we know that the receiver was an alias of
+ * some other local variable or stack slot. Therefore we use the AliasingFrame class.
+ *
+ * TODO:
+ * Finally, we'd also like to exploit the knowledge gained from `if (x == null)` tests: x is known
+ * to be null in one branch, not null in the other. This will make use of alias tracking as well.
+ * We still have to figure out how to do this exactly in the analyzer framework.
+ */
+
+/**
+ * Type to represent nullness of values.
+ */
+sealed trait Nullness {
+  final def merge(other: Nullness) = if (this == other) this else Unknown
+}
+case object NotNull extends Nullness
+case object Unknown extends Nullness
+case object Null    extends Nullness
+
+/**
+ * Represents the nullness state for a local variable or stack value.
+ *
+ * Note that nullness of primitive values is not tracked, it will be always [[Unknown]].
+ *
+ * @param nullness     The nullness of this value.
+ * @param longOrDouble True if this value is a long or double. The Analyzer framework needs to know
+ *                     the size of each value when interpreting instructions, see `Frame.execute`.
+ */
+final case class NullnessValue(nullness: Nullness, longOrDouble: Boolean) extends Value {
+  def this(nullness: Nullness, insn: AbstractInsnNode) = this(nullness, longOrDouble = BytecodeUtils.instructionResultSize(insn) == 2)
+
+  /**
+   * The size of the slot described by this value. Cannot be 0 because no values are allocated
+   * for void-typed slots, see NullnessInterpreter.newValue.
+   **/
+  def getSize: Int = if (longOrDouble) 2 else 1
+
+  def merge(other: NullnessValue) = NullnessValue(nullness merge other.nullness, longOrDouble)
+}
+
+object NullnessValue {
+  def apply(nullness: Nullness, insn: AbstractInsnNode) = new NullnessValue(nullness, insn)
+}
+
+final class NullnessInterpreter extends Interpreter[NullnessValue](Opcodes.ASM5) {
+  def newValue(tp: Type): NullnessValue = {
+    // ASM loves giving semantics to null. The behavior here is the same as in SourceInterpreter,
+    // which is provided by the framework.
+    //
+    // (1) For the void type, the ASM framework expects newValue to return `null`.
+    //     Also, the Frame.returnValue field is `null` for methods with return type void.
+    //     Example callsite passing VOID_TYPE: in Analyzer, `newValue(Type.getReturnType(m.desc))`.
+    //
+    // (2) `tp` may also be `null`. When creating the initial frame, the analyzer invokes
+    //     `newValue(null)` for each local variable. We have to return a value of size 1.
+    if (tp == Type.VOID_TYPE) null // (1)
+    else NullnessValue(Unknown, longOrDouble = tp != null /*(2)*/ && tp.getSize == 2 )
+  }
+
+  override def newParameterValue(isInstanceMethod: Boolean, local: Int, tp: Type): NullnessValue = {
+    // For instance methods, the `this` parameter is known to be not null.
+    if (isInstanceMethod && local == 0) NullnessValue(NotNull, longOrDouble = false)
+    else super.newParameterValue(isInstanceMethod, local, tp)
+  }
+
+  def newOperation(insn: AbstractInsnNode): NullnessValue = {
+    val nullness = (insn.getOpcode: @switch) match {
+      case Opcodes.ACONST_NULL => Null
+
+      case Opcodes.LDC => insn.asInstanceOf[LdcInsnNode].cst match {
+        case _: String | _: Type => NotNull
+        case _ => Unknown
+      }
+
+      case _ => Unknown
+    }
+
+    // for Opcodes.NEW, we use Unknown. The value will become NotNull after the constructor call.
+    NullnessValue(nullness, insn)
+  }
+
+  def copyOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = value
+
+  def unaryOperation(insn: AbstractInsnNode, value: NullnessValue): NullnessValue = (insn.getOpcode: @switch) match {
+    case Opcodes.NEWARRAY |
+         Opcodes.ANEWARRAY => NullnessValue(NotNull, longOrDouble = false)
+
+    case _ => NullnessValue(Unknown, insn)
+  }
+
+  def binaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue): NullnessValue = {
+    NullnessValue(Unknown, insn)
+  }
+
+  def ternaryOperation(insn: AbstractInsnNode, value1: NullnessValue, value2: NullnessValue, value3: NullnessValue): NullnessValue = {
+    NullnessValue(Unknown, longOrDouble = false)
+  }
+
+  def naryOperation(insn: AbstractInsnNode, values: util.List[_ <: NullnessValue]): NullnessValue = (insn.getOpcode: @switch) match {
+    case Opcodes.MULTIANEWARRAY =>
+      NullnessValue(NotNull, longOrDouble = false)
+
+    case _ =>
+      // TODO: use a list of methods that are known to return non-null values
+      NullnessValue(Unknown, insn)
+  }
+
+  def returnOperation(insn: AbstractInsnNode, value: NullnessValue, expected: NullnessValue): Unit = ()
+
+  def merge(a: NullnessValue, b: NullnessValue): NullnessValue = a merge b
+}
+
+class NullnessFrame(nLocals: Int, nStack: Int) extends AliasingFrame[NullnessValue](nLocals, nStack) {
+  // Auxiliary constructor required for implementing `NullnessAnalyzer.newFrame`
+  def this(src: Frame[_ <: NullnessValue]) {
+    this(src.getLocals, src.getMaxStackSize)
+    init(src)
+  }
+
+  override def execute(insn: AbstractInsnNode, interpreter: Interpreter[NullnessValue]): Unit = {
+    import Opcodes._
+
+    // get the object id of the object that is known to be not-null after this operation
+    val nullCheckedAliasId: Long = (insn.getOpcode: @switch) match {
+      case IALOAD |
+           LALOAD |
+           FALOAD |
+           DALOAD |
+           AALOAD |
+           BALOAD |
+           CALOAD |
+           SALOAD =>
+        aliasId(this.stackTop - 1)
+
+      case IASTORE |
+           FASTORE |
+           AASTORE |
+           BASTORE |
+           CASTORE |
+           SASTORE |
+           LASTORE |
+           DASTORE =>
+        aliasId(this.stackTop - 2)
+
+      case GETFIELD =>
+        aliasId(this.stackTop)
+
+      case PUTFIELD =>
+        aliasId(this.stackTop - 1)
+
+      case INVOKEVIRTUAL |
+           INVOKESPECIAL |
+           INVOKEINTERFACE =>
+        val desc = insn.asInstanceOf[MethodInsnNode].desc
+        val numArgs = Type.getArgumentTypes(desc).length
+        aliasId(this.stackTop - numArgs)
+
+      case ARRAYLENGTH |
+           MONITORENTER |
+           MONITOREXIT =>
+        aliasId(this.stackTop)
+
+      case _ =>
+        -1
+    }
+
+    super.execute(insn, interpreter)
+
+    if (nullCheckedAliasId != -1) {
+      for (i <- valuesWithAliasId(nullCheckedAliasId))
+        this.setValue(i, this.getValue(i).copy(nullness = NotNull))
+    }
+  }
+}
+
+/**
+ * This class is required to override the `newFrame` methods, which makes makes sure the analyzer
+ * uses NullnessFrames.
+ */
+class NullnessAnalyzer extends Analyzer[NullnessValue](new NullnessInterpreter) {
+  override def newFrame(nLocals: Int, nStack: Int): NullnessFrame = new NullnessFrame(nLocals, nStack)
+  override def newFrame(src: Frame[_ <: NullnessValue]): NullnessFrame = new NullnessFrame(src)
+}