From 3a9581d32f9d3adb1dcb0b9c4bfeb9c86f0addcf Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 22 Oct 2015 18:32:19 +0200 Subject: More efficient way to compute maxLocals / maxStack In order to run an asm Analyzer, the maxLocals / maxStack values of the method need to be computed. Asm doesn't provide an efficient built-in for this purpose, but it computes these values while serializing a class. Previously, we used to serialize the method just to compute the max's, which is inefficient. This commit implements a separate, efficient traversal. The computed values are also smaller, allowing to save space when running an Analyzer: asm Analyzers only allocate a single stack slot for long/double values, while the JVM allocates two. The maxStack computed previously would always use two slots, which is not necessary. The new calculation was verified to be correct in the following way: as a test, i left the old computation in place, ran the new one in addition (in a special mode where the long/double values take two slots) and asserted equality. Bootstrapping and test suite passed. --- .../nsc/backend/jvm/analysis/AliasingFrame.scala | 2 +- .../nsc/backend/jvm/analysis/BackendUtils.scala | 130 +++++++++- .../jvm/analysis/InstructionStackEffect.scala | 266 ++++++++++++++------- .../jvm/analysis/ProdConsAnalyzerImpl.scala | 4 +- .../tools/nsc/backend/jvm/opt/CallGraph.scala | 159 ++++++------ .../scala/tools/nsc/backend/jvm/opt/LocalOpt.scala | 34 +-- 6 files changed, 386 insertions(+), 209 deletions(-) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 9e5fbfcc0e..337f7787bd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -93,7 +93,7 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc def peekStack(n: Int): V = this.peekStack(n) // the val pattern `val (p, c) = f` still allocates a tuple (https://github.com/scala-opt/scala/issues/28) - val prodCons = InstructionStackEffect(insn, this) // needs to be called before super.execute, see its doc + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, this) // needs to be called before super.execute, see its doc val consumed = prodCons._1 val produced = prodCons._2 diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala index 1da32bc7a8..aeb0f41237 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala @@ -3,7 +3,7 @@ package backend.jvm package analysis import scala.annotation.switch -import scala.tools.asm.{Handle, Type, Label} +import scala.tools.asm.{Opcodes, Handle, Type, Label} import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Frame, BasicInterpreter, Analyzer, Value} import scala.tools.nsc.backend.jvm.BTypes._ @@ -28,7 +28,7 @@ class BackendUtils[BT <: BTypes](val btypes: BT) { * A wrapper to make ASM's Analyzer a bit easier to use. */ class AsmAnalyzer[V <: Value](methodNode: MethodNode, classInternalName: InternalName, val analyzer: Analyzer[V] = new Analyzer(new BasicInterpreter)) { - localOpt.computeMaxLocalsMaxStack(methodNode) + computeMaxLocalsMaxStack(methodNode) analyzer.analyze(classInternalName, methodNode) def frameAt(instruction: AbstractInsnNode): Frame[V] = analyzer.frameAt(instruction, methodNode) } @@ -256,4 +256,130 @@ class BackendUtils[BT <: BTypes](val btypes: BT) { } innerClasses.toList } + + /** + * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM + * framework only computes these values during bytecode generation. + * + * NOTE 1: as explained in the `analysis` package object, the maxStack value used by the Analyzer + * may be smaller than the correct maxStack value in the classfile (Analyzers only use a single + * slot for long / double values). The maxStack computed here are correct for running an analyzer, + * but not for writing in the classfile. We let the ClassWriter recompute max's. + * + * NOTE 2: the maxStack value computed here may be larger than the smallest correct value + * that would allow running an analyzer, see `InstructionStackEffect.forAsmAnalysisConservative`. + * + * NOTE 3: the implementation doesn't look at instructions that cannot be reached, it computes + * the max local / stack size in the reachable code. These max's work just fine for running an + * Analyzer: its implementation also skips over unreachable code in the same way. + */ + def computeMaxLocalsMaxStack(method: MethodNode): Unit = { + import Opcodes._ + + if (isAbstractMethod(method) || isNativeMethod(method)) { + method.maxLocals = 0 + method.maxStack = 0 + } else if (!maxLocalsMaxStackComputed(method)) { + val size = method.instructions.size + + var maxLocals = (Type.getArgumentsAndReturnSizes(method.desc) >> 2) - (if (isStaticMethod(method)) 1 else 0) + var maxStack = 0 + + // queue of instruction indices where analysis should start + var queue = new Array[Int](8) + var top = -1 + def enq(i: Int): Unit = { + if (top == queue.length - 1) { + val nq = new Array[Int](queue.length * 2) + Array.copy(queue, 0, nq, 0, queue.length) + queue = nq + } + top += 1 + queue(top) = i + } + def deq(): Int = { + val r = queue(top) + top -= 1 + r + } + + // for each instruction in the queue, contains the stack height at this instruction. + // once an instruction has been treated, contains -1 to prevent re-enqueuing + val stackHeights = new Array[Int](size) + + def enqInsn(insn: AbstractInsnNode, height: Int): Unit = { + enqInsnIndex(method.instructions.indexOf(insn), height) + } + + def enqInsnIndex(insnIndex: Int, height: Int): Unit = { + if (insnIndex < size && stackHeights(insnIndex) != -1) { + stackHeights(insnIndex) = height + enq(insnIndex) + } + } + + val tcbIt = method.tryCatchBlocks.iterator() + while (tcbIt.hasNext) { + val tcb = tcbIt.next() + enqInsn(tcb.handler, 1) + if (maxStack == 0) maxStack = 1 + } + + enq(0) + while (top != -1) { + val insnIndex = deq() + val insn = method.instructions.get(insnIndex) + val initHeight = stackHeights(insnIndex) + stackHeights(insnIndex) = -1 // prevent i from being enqueued again + + if (insn.getOpcode == -1) { // frames, labels, line numbers + enqInsnIndex(insnIndex + 1, initHeight) + } else { + val (cons, prod) = InstructionStackEffect.forAsmAnalysisConservative(insn) + val heightAfter = initHeight - cons + prod + if (heightAfter > maxStack) maxStack = heightAfter + + // update maxLocals + insn match { + case v: VarInsnNode => + val longSize = if (isSize2LoadOrStore(v.getOpcode)) 1 else 0 + maxLocals = math.max(maxLocals, v.`var` + longSize + 1) // + 1 becauase local numbers are 0-based + + case i: IincInsnNode => + maxLocals = math.max(maxLocals, i.`var` + 1) + + case _ => + } + + insn match { + case j: JumpInsnNode => + enqInsn(j.label, heightAfter) + val opc = j.getOpcode + if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target + case l: LookupSwitchInsnNode => + var j = 0 + while (j < l.labels.size) { + enqInsn(l.labels.get(j), heightAfter); j += 1 + } + enqInsn(l.dflt, heightAfter) + case t: TableSwitchInsnNode => + var j = 0 + while (j < t.labels.size) { + enqInsn(t.labels.get(j), heightAfter); j += 1 + } + enqInsn(t.dflt, heightAfter) + case _ => + val opc = insn.getOpcode + if (opc != ATHROW && !isReturn(insn)) + enqInsnIndex(insnIndex + 1, heightAfter) + } + } + } + + method.maxLocals = maxLocals + method.maxStack = maxStack + + maxLocalsMaxStackComputed += method + } + } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala index 8d8ea839e6..1f81736a5d 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala @@ -5,35 +5,84 @@ package analysis import scala.annotation.switch import scala.tools.asm.Opcodes._ import scala.tools.asm.Type -import scala.tools.asm.tree.{MultiANewArrayInsnNode, InvokeDynamicInsnNode, MethodInsnNode, AbstractInsnNode} +import scala.tools.asm.tree._ import scala.tools.asm.tree.analysis.{Frame, Value} import opt.BytecodeUtils._ -import collection.immutable object InstructionStackEffect { - private var cache: immutable.IntMap[(Int, Int)] = immutable.IntMap.empty + // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases). + val maxCachedX = 10 + val maxCachedY = 6 + val xShift = 3 + private val cache = new Array[(Int, Int)]((maxCachedX << xShift) + maxCachedY + 1) private def t(x: Int, y: Int): (Int, Int) = { - // x can go up to 255 (number of parameters of a method, dimensions in multianewarray) we cache - // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases). - if (x > 10 || y > 6) (x, y) + if (x > maxCachedX || y > maxCachedY) (x, y) else { - val key = (x << 8) + y // this would work for any x < 256 - if (cache contains key) { + val key = (x << xShift) + y + if (cache(key) != null) { cache(key) } else { val r = (x, y) - cache += key -> r + cache(key) = r r } } } /** - * Returns a pair with the number of stack values consumed and produced by `insn`. - * This method requires the `frame` to be in the state **before** executing / interpreting - * the `insn`. + * Returns a pair with the number of stack values consumed and produced by `insn`. The returned + * values are correct for use in asm's Analyzer framework. For example, a LLOAD instruction + * produces one stack value. See also doc in `analysis` package object. + * + * This method requires the `frame` to be in the state **before** executing / interpreting the + * `insn`. */ - def apply[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = { + def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = computeConsProd(insn, frame = frame) + + /** + * Returns a pair with the number of stack values consumed and produced by `insn`. The returned + * values are usually the same as used in asm's Analyzer framework, but they may be larger. For + * example, consider a POP2 instruction: + * - if two size-1 values are popped, then the asm Analyzer consumes two values + * - if a size-2 value is popped, the asm Analyzer consumes only one stack slot (see doc in the + * `analysis` package object + * + * If a precise result is needed, invoke the `forAsmAnalysis` and provide a `frame` value that + * allows looking up the sizes of values on the stack. + * + * When the precise stack effect is unknown, this method returns values that are safe for + * computing an upper bound on the stack size for running an Analyzer: (prod - cons) is the + * largest possible value for any input sizes. + */ + def forAsmAnalysisConservative(insn: AbstractInsnNode): (Int, Int) = computeConsProd(insn, conservative = true) + + /** + * Returns a pair with the number of stack values consumed and produced by `insn`. The returned + * values are correct for writing into a classfile (see doc on `analysis` package object). + */ + def forClassfile(insn: AbstractInsnNode): (Int, Int) = computeConsProd(insn, forClassfile = true) + + private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): (Int, Int) = { + val consumesReceiver = insn.getOpcode != INVOKESTATIC && insn.getOpcode != INVOKEDYNAMIC + if (forClassfile) { + val sizes = Type.getArgumentsAndReturnSizes(methodDesc) + val cons = (sizes >> 2) - (if (consumesReceiver) 0 else 1) + val prod = sizes & 0x03 + t(cons, prod) + } else { + val cons = Type.getArgumentTypes(methodDesc).length + (if (consumesReceiver) 1 else 0) + val prod = if (Type.getReturnType(methodDesc) == Type.VOID_TYPE) 0 else 1 + t(cons, prod) + } + } + + private def fieldInsnIsLongOrDouble(insn: AbstractInsnNode) = { + val d = insn.asInstanceOf[FieldInsnNode].desc + d == "J" || d == "D" + } + + private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean = false, conservative: Boolean = false, frame: Frame[V] = null): (Int, Int) = { + // not used if `forClassfile || conservative`: in these cases, `frame` is allowed to be `null` def peekStack(n: Int): V = frame.peekStack(n) (insn.getOpcode: @switch) match { @@ -48,142 +97,176 @@ object InstructionStackEffect { ICONST_3 | ICONST_4 | ICONST_5 | - LCONST_0 | - LCONST_1 | FCONST_0 | FCONST_1 | FCONST_2 | - DCONST_0 | - DCONST_1 | BIPUSH | SIPUSH | - LDC | ILOAD | - LLOAD | FLOAD | - DLOAD | ALOAD => t(0, 1) + case LDC => + if (forClassfile) insn.asInstanceOf[LdcInsnNode].cst match { + case _: java.lang.Long | _: java.lang.Double => t(0, 2) + case _ => t(0, 1) + } else + t(0, 1) + + case LCONST_0 | + LCONST_1 | + DCONST_0 | + DCONST_1 | + LLOAD | + DLOAD => if (forClassfile) t(0, 2) else t(0, 1) + case IALOAD | - LALOAD | FALOAD | - DALOAD | AALOAD | BALOAD | CALOAD | SALOAD => t(2, 1) + case LALOAD | + DALOAD => if (forClassfile) t(2, 2) else t(2, 1) + case ISTORE | - LSTORE | FSTORE | - DSTORE | ASTORE => t(1, 0) + case LSTORE | + DSTORE => if (forClassfile) t(2, 0) else t(1, 0) + case IASTORE | - LASTORE | FASTORE | - DASTORE | AASTORE | BASTORE | CASTORE | SASTORE => t(3, 0) + case LASTORE | + DASTORE => if (forClassfile) t(4, 0) else t(3, 0) + case POP => t(1, 0) case POP2 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(1, 0) else t(2, 0) + if (forClassfile) t(2, 0) + else if (conservative) t(1, 0) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(1, 0) else t(2, 0) + } case DUP => t(1, 2) case DUP_X1 => t(2, 3) case DUP_X2 => - val isSize2 = peekStack(1).getSize == 2 - if (isSize2) t(2, 3) else t(3, 4) + if (forClassfile || conservative) t(3, 4) + else { + val isSize2 = peekStack(1).getSize == 2 + if (isSize2) t(2, 3) else t(3, 4) + } case DUP2 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(1, 2) else t(2, 4) + if (forClassfile || conservative) t(2, 4) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(1, 2) else t(2, 4) + } case DUP2_X1 => - val isSize2 = peekStack(0).getSize == 2 - if (isSize2) t(2, 3) else t(3, 4) + if (forClassfile || conservative) t(3, 5) + else { + val isSize2 = peekStack(0).getSize == 2 + if (isSize2) t(2, 3) else t(3, 5) + } case DUP2_X2 => - val v1isSize2 = peekStack(0).getSize == 2 - if (v1isSize2) { - val v2isSize2 = peekStack(1).getSize == 2 - if (v2isSize2) t(2, 3) else t(3, 4) - } else { - val v3isSize2 = peekStack(2).getSize == 2 - if (v3isSize2) t(3, 5) else t(4, 6) + if (forClassfile || conservative) t(4, 6) + else { + val v1isSize2 = peekStack(0).getSize == 2 + if (v1isSize2) { + val v2isSize2 = peekStack(1).getSize == 2 + if (v2isSize2) t(2, 3) else t(3, 4) + } else { + val v3isSize2 = peekStack(2).getSize == 2 + if (v3isSize2) t(3, 5) else t(4, 6) + } } case SWAP => t(2, 2) case IADD | - LADD | FADD | - DADD | ISUB | - LSUB | FSUB | - DSUB | IMUL | - LMUL | FMUL | - DMUL | IDIV | - LDIV | FDIV | - DDIV | IREM | + FREM => t(2, 1) + + case LADD | + DADD | + LSUB | + DSUB | + LMUL | + DMUL | + LDIV | + DDIV | LREM | - FREM | - DREM => t(2, 1) + DREM => if (forClassfile) t(4, 2) else t(2, 1) case INEG | - LNEG | - FNEG | - DNEG => t(1, 1) + FNEG => t(1, 1) + + case LNEG | + DNEG => if (forClassfile) t(2, 2) else t(1, 1) case ISHL | - LSHL | ISHR | - LSHR | IUSHR | - LUSHR | IAND | - LAND | IOR | + IXOR => t(2, 1) + + case LSHL | + LSHR | + LUSHR => if (forClassfile) t(3, 2) else t(2, 1) + + case LAND | LOR | - IXOR | - LXOR => t(2, 1) + LXOR => if (forClassfile) t(4, 2) else t(2, 1) case IINC => t(0, 0) - case I2L | - I2F | - I2D | - L2I | - L2F | - L2D | + case I2F | F2I | - F2L | - F2D | - D2I | - D2L | - D2F | I2B | I2C | I2S => t(1, 1) + case I2L | + I2D | + F2L | + F2D => if (forClassfile) t(1, 2) else t(1, 1) + + case L2I | + L2F | + D2I | + D2F => if (forClassfile) t(2, 1) else t(1, 1) + + case L2D | + D2L => if (forClassfile) t(2, 2) else t(1, 1) + + case FCMPL | + FCMPG => t(2, 1) + case LCMP | - FCMPL | - FCMPG | DCMPL | - DCMPG => t(2, 1) + DCMPG => if (forClassfile) t(4, 1) else t(2, 1) case IFEQ | IFNE | @@ -211,35 +294,36 @@ object InstructionStackEffect { LOOKUPSWITCH => t(1, 0) case IRETURN | - LRETURN | FRETURN | - DRETURN | ARETURN => t(1, 0) // Frame.execute consumes one stack value + case LRETURN | + DRETURN => if (forClassfile) t(2, 0) else t(1, 0) + case RETURN => t(0, 0) // Frame.execute does not change the stack - case GETSTATIC => t(0, 1) + case GETSTATIC => + val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(0, prod) - case PUTSTATIC => t(1, 0) + case PUTSTATIC => + val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(cons, 0) - case GETFIELD => t(1, 1) + case GETFIELD => + val prod = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 2 else 1 + t(1, prod) - case PUTFIELD => t(2, 0) + case PUTFIELD => + val cons = if (forClassfile && fieldInsnIsLongOrDouble(insn)) 3 else 2 + t(cons, 0) case INVOKEVIRTUAL | INVOKESPECIAL | INVOKESTATIC | - INVOKEINTERFACE => - val desc = insn.asInstanceOf[MethodInsnNode].desc - val cons = Type.getArgumentTypes(desc).length + (if (insn.getOpcode == INVOKESTATIC) 0 else 1) - val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1 - t(cons, prod) - - case INVOKEDYNAMIC => - val desc = insn.asInstanceOf[InvokeDynamicInsnNode].desc - val cons = Type.getArgumentTypes(desc).length - val prod = if (Type.getReturnType(desc) == Type.VOID_TYPE) 0 else 1 - t(cons, prod) + INVOKEINTERFACE => invokeConsProd(insn.asInstanceOf[MethodInsnNode].desc, insn, forClassfile) + + case INVOKEDYNAMIC => invokeConsProd(insn.asInstanceOf[InvokeDynamicInsnNode].desc, insn, forClassfile) case NEW => t(0, 1) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala index 242171476a..33bbbe5728 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala @@ -368,7 +368,7 @@ trait ProdConsAnalyzerImpl { Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) + val stackEffect = InstructionStackEffect.forAsmAnalysis(insn, frame) val stackSize = frame.getLocals + frame.getStackSize (stackSize - stackEffect._1) until stackSize } @@ -387,7 +387,7 @@ trait ProdConsAnalyzerImpl { Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect(insn, frame) + val stackEffect = InstructionStackEffect.forAsmAnalysis(insn, frame) val nextFrame = frameAt(insn.getNext) val stackSize = nextFrame.getLocals + nextFrame.getStackSize (stackSize - stackEffect._2) until stackSize diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala index 66810176a1..b192e1b46a 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/CallGraph.scala @@ -91,95 +91,94 @@ class CallGraph[BT <: BTypes](val btypes: BT) { if (!callsites.contains(methodNode)) addMethod(methodNode, definingClass) } - /** - * Returns a list of callsites in the method, plus a list of closure instantiation indy instructions. - */ def addMethod(methodNode: MethodNode, definingClass: ClassBType): Unit = { - // TODO: run dataflow analyses to make the call graph more precise - // - producers to get forwarded parameters (ForwardedParam) - // - typeAnalysis for more precise argument types, more precise callee - - // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance - // call is known to be not-null, in which case we don't have to emit a null check when inlining. - // It is also used to get the stack height at the call site. - - val analyzer = { - if (compilerSettings.YoptNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) { - Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer)) - } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) { - Some(new AsmAnalyzer(methodNode, definingClass.internalName)) - } else None - } - - // if the method is too large to run an analyzer, it is not added to the call graph - if (analyzer.nonEmpty) { - val Some(a) = analyzer - def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match { - case nullnessAnalyzer: NullnessAnalyzer => - val frame = nullnessAnalyzer.frameAt(call, methodNode) - frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue - case _ => false + if (!BytecodeUtils.isAbstractMethod(methodNode) && !BytecodeUtils.isNativeMethod(methodNode)) { + // TODO: run dataflow analyses to make the call graph more precise + // - producers to get forwarded parameters (ForwardedParam) + // - typeAnalysis for more precise argument types, more precise callee + + // For now we run a NullnessAnalyzer. It is used to determine if the receiver of an instance + // call is known to be not-null, in which case we don't have to emit a null check when inlining. + // It is also used to get the stack height at the call site. + + val analyzer = { + if (compilerSettings.YoptNullnessTracking && AsmAnalyzer.sizeOKForNullness(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName, new NullnessAnalyzer)) + } else if (AsmAnalyzer.sizeOKForBasicValue(methodNode)) { + Some(new AsmAnalyzer(methodNode, definingClass.internalName)) + } else None } - var methodCallsites = Map.empty[MethodInsnNode, Callsite] - var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] - - // lazy so it is only computed if actually used by computeArgInfos - lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) - - methodNode.instructions.iterator.asScala foreach { - case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code - val callee: Either[OptimizerWarning, Callee] = for { - (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] - (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] - } yield { - val declarationClassBType = classBTypeFromClassNode(declarationClassNode) - val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) - Callee( - callee = method, - calleeDeclarationClass = declarationClassBType, - safeToInline = safeToInline, - safeToRewrite = safeToRewrite, - annotatedInline = annotatedInline, - annotatedNoInline = annotatedNoInline, - samParamTypes = samParamTypes, - calleeInfoWarning = warning) + // if the method is too large to run an analyzer, it is not added to the call graph + if (analyzer.nonEmpty) { + val Some(a) = analyzer + def receiverNotNullByAnalysis(call: MethodInsnNode, numArgs: Int) = a.analyzer match { + case nullnessAnalyzer: NullnessAnalyzer => + val frame = nullnessAnalyzer.frameAt(call, methodNode) + frame.getStack(frame.getStackSize - 1 - numArgs) eq NotNullValue + case _ => false + } + + var methodCallsites = Map.empty[MethodInsnNode, Callsite] + var methodClosureInstantiations = Map.empty[InvokeDynamicInsnNode, ClosureInstantiation] + + // lazy so it is only computed if actually used by computeArgInfos + lazy val prodCons = new ProdConsAnalyzer(methodNode, definingClass.internalName) + + methodNode.instructions.iterator.asScala foreach { + case call: MethodInsnNode if a.frameAt(call) != null => // skips over unreachable code + val callee: Either[OptimizerWarning, Callee] = for { + (method, declarationClass) <- byteCodeRepository.methodNode(call.owner, call.name, call.desc): Either[OptimizerWarning, (MethodNode, InternalName)] + (declarationClassNode, source) <- byteCodeRepository.classNodeAndSource(declarationClass): Either[OptimizerWarning, (ClassNode, Source)] + } yield { + val declarationClassBType = classBTypeFromClassNode(declarationClassNode) + val CallsiteInfo(safeToInline, safeToRewrite, annotatedInline, annotatedNoInline, samParamTypes, warning) = analyzeCallsite(method, declarationClassBType, call.owner, source) + Callee( + callee = method, + calleeDeclarationClass = declarationClassBType, + safeToInline = safeToInline, + safeToRewrite = safeToRewrite, + annotatedInline = annotatedInline, + annotatedNoInline = annotatedNoInline, + samParamTypes = samParamTypes, + calleeInfoWarning = warning) + } + + val argInfos = computeArgInfos(callee, call, prodCons) + + val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { + val numArgs = Type.getArgumentTypes(call.desc).length + receiverNotNullByAnalysis(call, numArgs) } - val argInfos = computeArgInfos(callee, call, prodCons) + methodCallsites += call -> Callsite( + callsiteInstruction = call, + callsiteMethod = methodNode, + callsiteClass = definingClass, + callee = callee, + argInfos = argInfos, + callsiteStackHeight = a.frameAt(call).getStackSize, + receiverKnownNotNull = receiverNotNull, + callsitePosition = callsitePositions.getOrElse(call, NoPosition), + annotatedInline = inlineAnnotatedCallsites(call), + annotatedNoInline = noInlineAnnotatedCallsites(call) + ) - val receiverNotNull = call.getOpcode == Opcodes.INVOKESTATIC || { - val numArgs = Type.getArgumentTypes(call.desc).length - receiverNotNullByAnalysis(call, numArgs) - } + case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null => + val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) + val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) + methodClosureInstantiations += indy -> ClosureInstantiation( + lmf, + methodNode, + definingClass, + capturedArgInfos) - methodCallsites += call -> Callsite( - callsiteInstruction = call, - callsiteMethod = methodNode, - callsiteClass = definingClass, - callee = callee, - argInfos = argInfos, - callsiteStackHeight = a.frameAt(call).getStackSize, - receiverKnownNotNull = receiverNotNull, - callsitePosition = callsitePositions.getOrElse(call, NoPosition), - annotatedInline = inlineAnnotatedCallsites(call), - annotatedNoInline = noInlineAnnotatedCallsites(call) - ) - - case LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) if a.frameAt(indy) != null => - val lmf = LambdaMetaFactoryCall(indy, samMethodType, implMethod, instantiatedMethodType) - val capturedArgInfos = computeCapturedArgInfos(lmf, prodCons) - methodClosureInstantiations += indy -> ClosureInstantiation( - lmf, - methodNode, - definingClass, - capturedArgInfos) + case _ => + } - case _ => + callsites(methodNode) = methodCallsites + closureInstantiations(methodNode) = methodClosureInstantiations } - - callsites(methodNode) = methodCallsites - closureInstantiations(methodNode) = methodClosureInstantiations } } diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala index 38f3c51892..a80b3d0487 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/LocalOpt.scala @@ -50,38 +50,6 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { import btypes._ import backendUtils._ - /** - * In order to run an Analyzer, the maxLocals / maxStack fields need to be available. The ASM - * framework only computes these values during bytecode generation. - * - * Since there's currently no better way, we run a bytecode generator on the method and extract - * the computed values. This required changes to the ASM codebase: - * - the [[MethodWriter]] class was made public - * - accessors for maxLocals / maxStack were added to the MethodWriter class - * - * We could probably make this faster (and allocate less memory) by hacking the ASM framework - * more: create a subclass of MethodWriter with a /dev/null byteVector. Another option would be - * to create a separate visitor for computing those values, duplicating the functionality from the - * MethodWriter. - * - * NOTE: the maxStack value computed by this method allocates two slots for long / double values, - * as required by the JVM spec. For running an Analyzer, one slot per long / double would be fine. - * See comment in `analysis` package object. - */ - def computeMaxLocalsMaxStack(method: MethodNode): Unit = { - if (!maxLocalsMaxStackComputed(method)) { - method.maxLocals = 0 - method.maxStack = 0 - val cw = new ClassWriter(ClassWriter.COMPUTE_MAXS) - val excs = method.exceptions.asScala.toArray - val mw = cw.visitMethod(method.access, method.name, method.desc, method.signature, excs).asInstanceOf[MethodWriter] - method.accept(mw) - method.maxLocals = mw.getMaxLocals - method.maxStack = mw.getMaxStack - maxLocalsMaxStackComputed += method - } - } - /** * Remove unreachable code from a method. * @@ -225,7 +193,7 @@ class LocalOpt[BT <: BTypes](val btypes: BT) { var i = 0 var liveLabels = Set.empty[LabelNode] var removedInstructions = Set.empty[AbstractInsnNode] - var maxLocals = Type.getArgumentsAndReturnSizes(method.desc) >> 2 - (if (BytecodeUtils.isStaticMethod(method)) 1 else 0) + var maxLocals = (Type.getArgumentsAndReturnSizes(method.desc) >> 2) - (if (BytecodeUtils.isStaticMethod(method)) 1 else 0) var maxStack = 0 val itr = method.instructions.iterator() while (itr.hasNext) { -- cgit v1.2.3 From 4e994933c4b40a90555fe8acbe630f1c2fd03f55 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Tue, 27 Oct 2015 10:39:54 +0100 Subject: Support JSR / RET in computeMaxLocalsMaxStack Even though the two bytecodes are not allowed in classfiles of version 51+ (see [1]), we could encounter them when inlining from a JAR file containing classfiles of older version. [1] https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-4.html#jvms-4.9.1 --- .../nsc/backend/jvm/analysis/BackendUtils.scala | 21 ++++++++++++++++++--- .../nsc/backend/jvm/opt/InstructionResultSize.scala | 8 ++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala index aeb0f41237..efeaa54a7b 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala @@ -303,6 +303,8 @@ class BackendUtils[BT <: BTypes](val btypes: BT) { r } + val subroutineRetTargets = new mutable.Stack[AbstractInsnNode] + // for each instruction in the queue, contains the stack height at this instruction. // once an instruction has been treated, contains -1 to prevent re-enqueuing val stackHeights = new Array[Int](size) @@ -353,21 +355,34 @@ class BackendUtils[BT <: BTypes](val btypes: BT) { insn match { case j: JumpInsnNode => - enqInsn(j.label, heightAfter) - val opc = j.getOpcode - if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target + if (j.getOpcode == JSR) { + val jsrTargetHeight = heightAfter + 1 + if (jsrTargetHeight > maxStack) maxStack = jsrTargetHeight + subroutineRetTargets.push(j.getNext) + enqInsn(j.label, jsrTargetHeight) + } else { + enqInsn(j.label, heightAfter) + val opc = j.getOpcode + if (opc != GOTO) enqInsnIndex(insnIndex + 1, heightAfter) // jump is conditional, so the successor is also a possible control flow target + } + case l: LookupSwitchInsnNode => var j = 0 while (j < l.labels.size) { enqInsn(l.labels.get(j), heightAfter); j += 1 } enqInsn(l.dflt, heightAfter) + case t: TableSwitchInsnNode => var j = 0 while (j < t.labels.size) { enqInsn(t.labels.get(j), heightAfter); j += 1 } enqInsn(t.dflt, heightAfter) + + case r: VarInsnNode if r.getOpcode == RET => + enqInsn(subroutineRetTargets.pop(), heightAfter) + case _ => val opc = insn.getOpcode if (opc != ATHROW && !isReturn(insn)) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala index 8d744f6d13..79e44a8503 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/InstructionResultSize.scala @@ -33,14 +33,10 @@ object InstructionResultSize { case LDC => instruction.asInstanceOf[LdcInsnNode].cst match { - case _: java.lang.Integer | - _: java.lang.Float | - _: String | - _: Type | - _: Handle => 1 - case _: java.lang.Long | _: java.lang.Double => 2 + + case _ => 1 } case ILOAD | -- cgit v1.2.3 From 51e1489aaa6684298906b06ce0a6d95fa5b2fae5 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Mon, 26 Oct 2015 19:47:43 +0100 Subject: Use a single Int for the prod / cons values of InstructionStackEffect The maximal number of produced values of any instruction is 6, so we can safely encode the number of consumed and produced values into a single Int as (prod << 3) + cons. --- .../nsc/backend/jvm/analysis/AliasingFrame.scala | 5 +- .../nsc/backend/jvm/analysis/BackendUtils.scala | 4 +- .../jvm/analysis/InstructionStackEffect.scala | 60 +++++++++------------- .../jvm/analysis/ProdConsAnalyzerImpl.scala | 8 +-- 4 files changed, 33 insertions(+), 44 deletions(-) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala index 337f7787bd..596ee55290 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/AliasingFrame.scala @@ -92,10 +92,9 @@ class AliasingFrame[V <: Value](nLocals: Int, nStack: Int) extends Frame[V](nLoc def stackTop: Int = this.stackTop def peekStack(n: Int): V = this.peekStack(n) - // the val pattern `val (p, c) = f` still allocates a tuple (https://github.com/scala-opt/scala/issues/28) val prodCons = InstructionStackEffect.forAsmAnalysis(insn, this) // needs to be called before super.execute, see its doc - val consumed = prodCons._1 - val produced = prodCons._2 + val consumed = InstructionStackEffect.cons(prodCons) + val produced = InstructionStackEffect.prod(prodCons) super.execute(insn, interpreter) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala index efeaa54a7b..b02bc7c96e 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/BackendUtils.scala @@ -337,8 +337,8 @@ class BackendUtils[BT <: BTypes](val btypes: BT) { if (insn.getOpcode == -1) { // frames, labels, line numbers enqInsnIndex(insnIndex + 1, initHeight) } else { - val (cons, prod) = InstructionStackEffect.forAsmAnalysisConservative(insn) - val heightAfter = initHeight - cons + prod + val stackGrowth = InstructionStackEffect.maxStackGrowth(insn) + val heightAfter = initHeight + stackGrowth if (heightAfter > maxStack) maxStack = heightAfter // update maxLocals diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala index 1f81736a5d..4e81018451 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/InstructionStackEffect.scala @@ -10,59 +10,49 @@ import scala.tools.asm.tree.analysis.{Frame, Value} import opt.BytecodeUtils._ object InstructionStackEffect { - // x up to 10, which covers most cases and limits the cache. y doesn't go above 6 (see cases). - val maxCachedX = 10 - val maxCachedY = 6 - val xShift = 3 - private val cache = new Array[(Int, Int)]((maxCachedX << xShift) + maxCachedY + 1) - private def t(x: Int, y: Int): (Int, Int) = { - if (x > maxCachedX || y > maxCachedY) (x, y) - else { - val key = (x << xShift) + y - if (cache(key) != null) { - cache(key) - } else { - val r = (x, y) - cache(key) = r - r - } - } - } + val consShift = 3 + val prodMask = (1 << consShift) - 1 + + def cons(i: Int) = i >>> consShift + def prod(i: Int) = i & prodMask + + private def t(x: Int, y: Int): Int = (x << consShift) | y /** - * Returns a pair with the number of stack values consumed and produced by `insn`. The returned - * values are correct for use in asm's Analyzer framework. For example, a LLOAD instruction - * produces one stack value. See also doc in `analysis` package object. + * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int` + * (the `cons` / `prod` extract individual values). The returned values are correct for use in + * asm's Analyzer framework. For example, a LLOAD instruction produces one stack value. See also + * doc in `analysis` package object. * * This method requires the `frame` to be in the state **before** executing / interpreting the * `insn`. */ - def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): (Int, Int) = computeConsProd(insn, frame = frame) + def forAsmAnalysis[V <: Value](insn: AbstractInsnNode, frame: Frame[V]): Int = computeConsProd(insn, frame = frame) /** - * Returns a pair with the number of stack values consumed and produced by `insn`. The returned - * values are usually the same as used in asm's Analyzer framework, but they may be larger. For + * Returns the maximal possible growth of the stack when executing `insn`. The returned value + * is usually the same as expected by asm's Analyzer framework, but it may be larger. For * example, consider a POP2 instruction: * - if two size-1 values are popped, then the asm Analyzer consumes two values * - if a size-2 value is popped, the asm Analyzer consumes only one stack slot (see doc in the - * `analysis` package object + * `analysis` package object) * * If a precise result is needed, invoke the `forAsmAnalysis` and provide a `frame` value that * allows looking up the sizes of values on the stack. - * - * When the precise stack effect is unknown, this method returns values that are safe for - * computing an upper bound on the stack size for running an Analyzer: (prod - cons) is the - * largest possible value for any input sizes. */ - def forAsmAnalysisConservative(insn: AbstractInsnNode): (Int, Int) = computeConsProd(insn, conservative = true) + def maxStackGrowth(insn: AbstractInsnNode): Int = { + val prodCons = computeConsProd(insn, conservative = true) + prod(prodCons) - cons(prodCons) + } /** - * Returns a pair with the number of stack values consumed and produced by `insn`. The returned - * values are correct for writing into a classfile (see doc on `analysis` package object). + * Returns the number of stack values consumed and produced by `insn`, encoded in a single `Int` + * (the `cons` / `prod` extract individual values). The returned values are correct for writing + * into a classfile (see doc on the `analysis` package object). */ - def forClassfile(insn: AbstractInsnNode): (Int, Int) = computeConsProd(insn, forClassfile = true) + def forClassfile(insn: AbstractInsnNode): Int = computeConsProd(insn, forClassfile = true) - private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): (Int, Int) = { + private def invokeConsProd(methodDesc: String, insn: AbstractInsnNode, forClassfile: Boolean): Int = { val consumesReceiver = insn.getOpcode != INVOKESTATIC && insn.getOpcode != INVOKEDYNAMIC if (forClassfile) { val sizes = Type.getArgumentsAndReturnSizes(methodDesc) @@ -81,7 +71,7 @@ object InstructionStackEffect { d == "J" || d == "D" } - private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean = false, conservative: Boolean = false, frame: Frame[V] = null): (Int, Int) = { + private def computeConsProd[V <: Value](insn: AbstractInsnNode, forClassfile: Boolean = false, conservative: Boolean = false, frame: Frame[V] = null): Int = { // not used if `forClassfile || conservative`: in these cases, `frame` is allowed to be `null` def peekStack(n: Int): V = frame.peekStack(n) diff --git a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala index 33bbbe5728..c933341492 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/analysis/ProdConsAnalyzerImpl.scala @@ -368,9 +368,9 @@ trait ProdConsAnalyzerImpl { Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect.forAsmAnalysis(insn, frame) + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame) val stackSize = frame.getLocals + frame.getStackSize - (stackSize - stackEffect._1) until stackSize + (stackSize - InstructionStackEffect.cons(prodCons)) until stackSize } } @@ -387,10 +387,10 @@ trait ProdConsAnalyzerImpl { Seq(insn.asInstanceOf[IincInsnNode].`var`) } else { val frame = frameAt(insn) - val stackEffect = InstructionStackEffect.forAsmAnalysis(insn, frame) + val prodCons = InstructionStackEffect.forAsmAnalysis(insn, frame) val nextFrame = frameAt(insn.getNext) val stackSize = nextFrame.getLocals + nextFrame.getStackSize - (stackSize - stackEffect._2) until stackSize + (stackSize - InstructionStackEffect.prod(prodCons)) until stackSize } } -- cgit v1.2.3