diff options
author | Miguel Garcia <miguelalfredo.garcia@epfl.ch> | 2012-02-17 18:02:53 +0100 |
---|---|---|
committer | Miguel Garcia <miguelalfredo.garcia@epfl.ch> | 2012-02-17 18:05:02 +0100 |
commit | 6255c482572441e729a59e448adfa12d338752bc (patch) | |
tree | 4d3b9812b1a11735c17b09119e1fafe73d9cca44 /src/compiler | |
parent | 91148376049a152edec12348ff9b7e9e93e6ebe1 (diff) | |
download | scala-6255c482572441e729a59e448adfa12d338752bc.tar.gz scala-6255c482572441e729a59e448adfa12d338752bc.tar.bz2 scala-6255c482572441e729a59e448adfa12d338752bc.zip |
significantly faster Inliner, with extensive documentation.
The same inlining decisions are made as before, see -Ylog:inliner, with a focus on lines starting with "[log inliner] Inlining"
review by @VladUreche @dragos @paulp
Diffstat (limited to 'src/compiler')
3 files changed, 635 insertions, 336 deletions
diff --git a/src/compiler/scala/tools/nsc/backend/icode/analysis/DataFlowAnalysis.scala b/src/compiler/scala/tools/nsc/backend/icode/analysis/DataFlowAnalysis.scala index 60cb679782..9f43e1b84c 100644 --- a/src/compiler/scala/tools/nsc/backend/icode/analysis/DataFlowAnalysis.scala +++ b/src/compiler/scala/tools/nsc/backend/icode/analysis/DataFlowAnalysis.scala @@ -60,20 +60,17 @@ trait DataFlowAnalysis[L <: SemiLattice] { val output = f(point, in(point)) if ((lattice.bottom == out(point)) || output != out(point)) { -// Console.println("Output changed at " + point -// + " from: " + out(point) + " to: " + output -// + " for input: " + in(point) + " and they are different: " + (output != out(point))) + // Console.println("Output changed at " + point + // + " from: " + out(point) + " to: " + output + // + " for input: " + in(point) + " and they are different: " + (output != out(point))) out(point) = output val succs = point.successors succs foreach { p => - if (!worklist(p)) - worklist += p; - if (!in.isDefinedAt(p)) - assert(false, "Invalid successor for: " + point + " successor " + p + " does not exist") -// if (!p.exceptionHandlerHeader) { -// println("lubbing " + p.predecessors + " outs: " + p.predecessors.map(out.apply).mkString("\n", "\n", "")) - in(p) = lattice.lub(in(p) :: (p.predecessors map out.apply), p.exceptionHandlerStart) -// } + val updated = lattice.lub(in(p) :: (p.predecessors map out.apply), p.exceptionHandlerStart) + if(updated != in(p)) { + in(p) = updated + if (!worklist(p)) { worklist += p; } + } } } } diff --git a/src/compiler/scala/tools/nsc/backend/icode/analysis/TypeFlowAnalysis.scala b/src/compiler/scala/tools/nsc/backend/icode/analysis/TypeFlowAnalysis.scala index 6421d6c8ef..877c51ebc1 100644 --- a/src/compiler/scala/tools/nsc/backend/icode/analysis/TypeFlowAnalysis.scala +++ b/src/compiler/scala/tools/nsc/backend/icode/analysis/TypeFlowAnalysis.scala @@ -127,34 +127,6 @@ abstract class TypeFlowAnalysis { } } - /** reinitialize the analysis, keeping around solutions from a previous run. */ - def reinit(m: icodes.IMethod) { - if (this.method == null || this.method.symbol != m.symbol) - init(m) - else reinit { - m foreachBlock { b => - if (!in.contains(b)) { - for (p <- b.predecessors) { - if (out.isDefinedAt(p)) { - in(b) = out(p) - worklist += p - } - /* else - in(b) = typeFlowLattice.bottom - */ } - out(b) = typeFlowLattice.bottom - } - } - for (handler <- m.exh) { - val start = handler.startBlock - if (!in.contains(start)) { - worklist += start - in(start) = lattice.IState(in(start).vars, typeStackLattice.exceptionHandlerStack) - } - } - } - } - def this(m: icodes.IMethod) { this() init(m) @@ -162,7 +134,7 @@ abstract class TypeFlowAnalysis { def run = { timer.start -// icodes.lubs0 = 0 + // icodes.lubs0 = 0 forwardAnalysis(blockTransfer) val t = timer.stop if (settings.debug.value) { @@ -170,216 +142,35 @@ abstract class TypeFlowAnalysis { assert(visited.contains(b), "Block " + b + " in " + this.method + " has input equal to bottom -- not visited? .." + visited)); } -// log("" + method.symbol.fullName + " [" + method.code.blocks.size + " blocks] " -// + "\n\t" + iterations + " iterations: " + t + " ms." -// + "\n\tlubs: " + typeFlowLattice.lubs + " out of which " + icodes.lubs0 + " typer lubs") + // log("" + method.symbol.fullName + " [" + method.code.blocks.size + " blocks] " + // + "\n\t" + iterations + " iterations: " + t + " ms." + // + "\n\tlubs: " + typeFlowLattice.lubs + " out of which " + icodes.lubs0 + " typer lubs") } def blockTransfer(b: BasicBlock, in: lattice.Elem): lattice.Elem = { - b.iterator.foldLeft(in)(interpret) - } - /** The flow function of a given basic block. */ - /* var flowFun: immutable.Map[BasicBlock, TransferFunction] = new immutable.HashMap */ - - /** Fill flowFun with a transfer function per basic block. */ -/* - private def buildFlowFunctions(blocks: List[BasicBlock]) { - def transfer(b: BasicBlock): TransferFunction = { - var gens: List[Gen] = Nil - var consumed: Int = 0 - val stack = new SimulatedStack - - for (instr <- b) instr match { - case THIS(clasz) => - stack push toTypeKind(clasz.tpe) - - case CONSTANT(const) => - stack push toTypeKind(const.tpe) - - case LOAD_ARRAY_ITEM(kind) => - stack.pop2 - stack.push(kind) - - case LOAD_LOCAL(local) => - val t = bindings(local) - stack push (if (t == typeLattice.bottom) local.kind else t) - - case LOAD_FIELD(field, isStatic) => - if (!isStatic) - stack.pop - stack push toTypeKind(field.tpe) - - case LOAD_MODULE(module) => - stack push toTypeKind(module.tpe) - - case STORE_ARRAY_ITEM(kind) => - stack.pop3 - - case STORE_LOCAL(local) => - val t = stack.pop - bindings += (local -> t) - - case STORE_THIS(_) => - stack.pop - - case STORE_FIELD(field, isStatic) => - if (isStatic) - stack.pop - else - stack.pop2 - - case CALL_PRIMITIVE(primitive) => - primitive match { - case Negation(kind) => - stack.pop; stack.push(kind) - case Test(_, kind, zero) => - stack.pop - if (!zero) stack.pop - stack push BOOL; - case Comparison(_, _) => - stack.pop2 - stack push INT - - case Arithmetic(op, kind) => - stack.pop - if (op != NOT) - stack.pop - val k = kind match { - case BYTE | SHORT | CHAR => INT - case _ => kind - } - stack push k - - case Logical(op, kind) => - stack.pop2 - stack push kind - - case Shift(op, kind) => - stack.pop2 - stack push kind - - case Conversion(src, dst) => - stack.pop - stack push dst - - case ArrayLength(kind) => - stack.pop - stack push INT - - case StartConcat => - stack.push(ConcatClass) - - case EndConcat => - stack.pop - stack.push(STRING) - - case StringConcat(el) => - stack.pop2 - stack push ConcatClass - } - - case CALL_METHOD(method, style) => style match { - case Dynamic => - stack.pop(1 + method.info.paramTypes.length) - stack.push(toTypeKind(method.info.resultType)) - - case Static(onInstance) => - if (onInstance) { - stack.pop(1 + method.info.paramTypes.length) - if (!method.isConstructor) - stack.push(toTypeKind(method.info.resultType)); - } else { - stack.pop(method.info.paramTypes.length) - stack.push(toTypeKind(method.info.resultType)) - } - - case SuperCall(mix) => - stack.pop(1 + method.info.paramTypes.length) - stack.push(toTypeKind(method.info.resultType)) - } - - case BOX(kind) => - stack.pop - stack.push(BOXED(kind)) - - case UNBOX(kind) => - stack.pop - stack.push(kind) - - case NEW(kind) => - stack.push(kind) - - case CREATE_ARRAY(elem, dims) => - stack.pop(dims) - stack.push(ARRAY(elem)) - - case IS_INSTANCE(tpe) => - stack.pop - stack.push(BOOL) - - case CHECK_CAST(tpe) => - stack.pop - stack.push(tpe) - - case SWITCH(tags, labels) => - stack.pop - - case JUMP(whereto) => - () - - case CJUMP(success, failure, cond, kind) => - stack.pop2 - - case CZJUMP(success, failure, cond, kind) => - stack.pop - - case RETURN(kind) => - if (kind != UNIT) - stack.pop; - - case THROW() => - stack.pop - - case DROP(kind) => - stack.pop - - case DUP(kind) => - stack.push(stack.head) - - case MONITOR_ENTER() => - stack.pop - - case MONITOR_EXIT() => - stack.pop - - case SCOPE_ENTER(_) | SCOPE_EXIT(_) => - () - - case LOAD_EXCEPTION(_) => - stack.pop(stack.length) - stack.push(typeLattice.Object) - - case _ => - dumpClassesAndAbort("Unknown instruction: " + i) - } - - new TransferFunction(consumed, gens) - } - - for (b <- blocks) { - flowFun = flowFun + (b -> transfer(b)) + var result = lattice.IState(new VarBinding(in.vars), new TypeStack(in.stack)) + var instrs = b.toList + while(!instrs.isEmpty) { + val i = instrs.head + result = mutatingInterpret(result, i) + instrs = instrs.tail } + result } -*/ + /** Abstract interpretation for one instruction. */ def interpret(in: typeFlowLattice.Elem, i: Instruction): typeFlowLattice.Elem = { val out = lattice.IState(new VarBinding(in.vars), new TypeStack(in.stack)) + mutatingInterpret(out, i) + } + + def mutatingInterpret(out: typeFlowLattice.Elem, i: Instruction): typeFlowLattice.Elem = { val bindings = out.vars val stack = out.stack if (settings.debug.value) { -// Console.println("[before] Stack: " + stack); -// Console.println(i); + // Console.println("[before] Stack: " + stack); + // Console.println(i); } i match { @@ -619,11 +410,292 @@ abstract class TypeFlowAnalysis { } } + case class CallsiteInfo(bb: icodes.BasicBlock, receiver: Symbol, stackLength: Int, concreteMethod: Symbol) + + /** + + A full type-flow analysis on a method computes in- and out-flows for each basic block (that's what MethodTFA does). + + For the purposes of Inliner, doing so guarantees that an abstract typestack-slot is available by the time an inlining candidate (a CALL_METHOD instruction) is visited. + This subclass (MTFAGrowable) of MethodTFA also aims at performing such analysis on CALL_METHOD instructions, with some differences: + + (a) early screening is performed while the type-flow is being computed (in an override of `blockTransfer`) by testing a subset of the conditions that Inliner checks later. + The reasoning here is: if the early check fails at some iteration, there's no chance a follow-up iteration (with a yet more lub-ed typestack-slot) will succeed. + Failure is sufficient to remove that particular CALL_METHOD from the typeflow's `remainingCALLs`. + A forward note: in case inlining occurs at some basic block B, all blocks reachable from B get their CALL_METHOD instructions considered again as candidates + (because of the more precise types that -- perhaps -- can be computed). + + (b) in case the early check does not fail, no conclusive decision can be made, thus the CALL_METHOD stays `isOnwatchlist`. + + In other words, `remainingCALLs` tracks those callsites that still remain as candidates for inlining, so that Inliner can focus on those. + `remainingCALLs` also caches info about the typestack just before the callsite, so as to spare computing them again at inlining time. + + Besides caching, a further optimization involves skipping those basic blocks whose in-flow and out-flow isn't needed anyway (as explained next). + A basic block lacking a callsite in `remainingCALLs`, when visisted by the standard algorithm, won't cause any inlining. + But as we know from the way type-flows are computed, computing the in- and out-flow for a basic block relies in general on those of other basic blocks. + In detail, we want to focus on that sub-graph of the CFG such that control flow may reach a remaining candidate callsite. + Those basic blocks not in that subgraph can be skipped altogether. That's why: + - `forwardAnalysis()` in `MTFAGrowable` now checks for inclusion of a basic block in `relevantBBs` + - same check is performed before adding a block to the worklist, and as part of choosing successors. + The bookkeeping supporting on-the-fly pruning of irrelevant blocks requires overridding most methods of the dataflow-analysis. + + The rest of the story takes place in Inliner, which does not visit all of the method's basic blocks but only on those represented in `remainingCALLs`. + + @author Miguel Garcia, http://lampwww.epfl.ch/~magarcia/ScalaCompilerCornerReloaded/ + + */ class MTFAGrowable extends MethodTFA { import icodes._ - /** discards what must be discarded, blanks what needs to be blanked out, and keeps the rest. */ + val remainingCALLs = mutable.Map.empty[opcodes.CALL_METHOD, CallsiteInfo] + + val preCandidates = mutable.Set.empty[BasicBlock] + + var callerLin: Traversable[BasicBlock] = null + + override def run { + + timer.start + forwardAnalysis(blockTransfer) + val t = timer.stop + + /* Now that `forwardAnalysis(blockTransfer)` has finished, all inlining candidates can be found in `remainingCALLs`, + whose keys are callsites and whose values are pieces of information about the typestack just before the callsite in question. + In order to keep `analyzeMethod()` simple, we collect in `preCandidates` those basic blocks containing at least one candidate. */ + preCandidates.clear() + for(rc <- remainingCALLs) { + preCandidates += rc._2.bb + } + + if (settings.debug.value) { + for(b <- callerLin; if (b != method.startBlock) && preCandidates(b)) { + assert(visited.contains(b), + "Block " + b + " in " + this.method + " has input equal to bottom -- not visited? .." + visited) + } + } + + } + + var shrinkedWatchlist = false + + /* + This is the method where information cached elsewhere is put to use. References are given those other places that populate those caches. + + The goal is avoiding computing type-flows for blocks we don't need (ie blocks not tracked in `relevantBBs`). The method used to add to `relevantBBs` is `putOnRadar`. + + Moreover, it's often the case that the last CALL_METHOD of interest ("of interest" equates to "being tracked in `isOnWatchlist`) isn't the last instruction on the block. + There are cases where the typeflows computed past this `lastInstruction` are needed, and cases when they aren't. + The reasoning behind this decsision is described in `populatePerimeter()`. All `blockTransfer()` needs to do (in order to know at which instruction it can stop) + is querying `isOnPerimeter`. + + Upon visiting a CALL_METHOD that's an inlining candidate, the relevant pieces of information about the pre-instruction typestack are collected for future use. + That is, unless the candidacy test fails. The reasoning here is: if such early check fails at some iteration, there's no chance a follow-up iteration + (with a yet more lub-ed typestack-slot) will succeed. In case of failure we can safely remove the CALL_METHOD from both `isOnWatchlist` and `remainingCALLs`. + + */ + override def blockTransfer(b: BasicBlock, in: lattice.Elem): lattice.Elem = { + var result = lattice.IState(new VarBinding(in.vars), new TypeStack(in.stack)) + + val stopAt = if(isOnPerimeter(b)) lastInstruction(b) else null; + var isPastLast = false + + var instrs = b.toList + while(!isPastLast && !instrs.isEmpty) { + val i = instrs.head + + if(isOnWatchlist(i)) { + val cm = i.asInstanceOf[opcodes.CALL_METHOD] + val msym = cm.method + val paramsLength = msym.info.paramTypes.size + val receiver = result.stack.types.drop(paramsLength).head match { + case REFERENCE(s) => s + case _ => NoSymbol // e.g. the scrutinee is BOX(s) or ARRAY + } + val concreteMethod = inliner.lookupImplFor(msym, receiver) + val isCandidate = { + ( inliner.isClosureClass(receiver) || concreteMethod.isEffectivelyFinal || receiver.isEffectivelyFinal ) && + !blackballed(concreteMethod) + } + if(isCandidate) { + remainingCALLs += Pair(cm, CallsiteInfo(b, receiver, result.stack.length, concreteMethod)) + } else { + remainingCALLs.remove(cm) + isOnWatchlist.remove(cm) + shrinkedWatchlist = true + } + } + + isPastLast = (i eq stopAt) + + if(!isPastLast) { + result = mutatingInterpret(result, i) + instrs = instrs.tail + } + } + + result + } // end of method blockTransfer + + val isOnWatchlist = mutable.Set.empty[Instruction] + + /* Each time CallerCalleeInfo.isSafeToInline determines a concrete callee is unsafe to inline in the current caller, + the fact is recorded in this TFA instance for the purpose of avoiding devoting processing to that callsite next time. + The condition of "being unsafe to inline in the current caller" sticks across inlinings and TFA re-inits + because it depends on the instructions of the callee, which stay unchanged during the course of `analyzeInc(caller)` + (with the caveat of the side-effecting `makePublic` in `helperIsSafeToInline`).*/ + val knownUnsafe = mutable.Set.empty[Symbol] + val knownSafe = mutable.Set.empty[Symbol] + val knownNever = mutable.Set.empty[Symbol] // `knownNever` needs be cleared only at the very end of the inlining phase (unlike `knownUnsafe` and `knownSafe`) + @inline final def blackballed(msym: Symbol): Boolean = { knownUnsafe(msym) || knownNever(msym) } + + val relevantBBs = mutable.Set.empty[BasicBlock] + + private def isPreCandidate(cm: opcodes.CALL_METHOD): Boolean = { + val msym = cm.method + val style = cm.style + // Dynamic == normal invocations + // Static(true) == calls to private members + !msym.isConstructor && !blackballed(msym) && + (style.isDynamic || (style.hasInstance && style.isStatic)) + // && !(msym hasAnnotation definitions.ScalaNoInlineClass) + } + + override def init(m: icodes.IMethod) { + super.init(m) + remainingCALLs.clear() + knownUnsafe.clear() + knownSafe.clear() + // initially populate the watchlist with all callsites standing a chance of being inlined + isOnWatchlist.clear() + relevantBBs.clear() + /* TODO Do we want to perform inlining in non-finally exception handlers? + * Seems counterproductive (the larger the method the less likely it will be JITed. + * It's not that putting on radar only `linearizer linearizeAt (m, m.startBlock)` makes for much shorter inlining times (a minor speedup nonetheless) + * but the effect on method size could be explored. */ + putOnRadar(m.linearizedBlocks(linearizer)) + populatePerimeter() + assert(relevantBBs.isEmpty || relevantBBs.contains(m.startBlock), "you gave me dead code") + } + + def conclusives(b: BasicBlock): List[opcodes.CALL_METHOD] = { + knownBeforehand(b) filter { cm => inliner.isMonadicMethod(cm.method) || inliner.hasInline(cm.method) } + } + + def knownBeforehand(b: BasicBlock): List[opcodes.CALL_METHOD] = { + b.toList collect { case c : opcodes.CALL_METHOD => c } filter { cm => isPreCandidate(cm) && isReceiverKnown(cm) } + } + + private def isReceiverKnown(cm: opcodes.CALL_METHOD): Boolean = { + cm.method.isEffectivelyFinal && cm.method.owner.isEffectivelyFinal + } + + private def putOnRadar(blocks: Traversable[BasicBlock]) { + for(bb <- blocks) { + val preCands = bb.toList collect { + case cm : opcodes.CALL_METHOD + if isPreCandidate(cm) /* && !isReceiverKnown(cm) */ + => cm + } + isOnWatchlist ++= preCands + } + relevantBBs ++= blocks + } + + /* the argument is also included in the result */ + private def transitivePreds(b: BasicBlock): Set[BasicBlock] = { transitivePreds(List(b)) } + + /* those BBs in the argument are also included in the result */ + private def transitivePreds(starters: Traversable[BasicBlock]): Set[BasicBlock] = { + val result = mutable.Set.empty[BasicBlock] + var toVisit: List[BasicBlock] = starters.toList.distinct + while(toVisit.nonEmpty) { + val h = toVisit.head + toVisit = toVisit.tail + result += h + for(p <- h.predecessors; if !result(p) && !toVisit.contains(p)) { toVisit = p :: toVisit } + } + result.toSet + } + + /* those BBs in the argument are also included in the result */ + private def transitiveSuccs(starters: Traversable[BasicBlock]): Set[BasicBlock] = { + val result = mutable.Set.empty[BasicBlock] + var toVisit: List[BasicBlock] = starters.toList.distinct + while(toVisit.nonEmpty) { + val h = toVisit.head + toVisit = toVisit.tail + result += h + for(p <- h.successors; if !result(p) && !toVisit.contains(p)) { toVisit = p :: toVisit } + } + result.toSet + } + + /* A basic block B is "on the perimeter" of the current control-flow subgraph if none of its successors belongs to that subgraph. + * In that case, for the purposes of inlining, we're interested in the typestack right before the last inline candidate in B, not in those afterwards. + * In particular we can do without computing the outflow at B. */ + private def populatePerimeter() { + isOnPerimeter.clear() + var done = true + do { + val (frontier, toPrune) = (relevantBBs filter hasNoRelevantSuccs) partition isWatching + isOnPerimeter ++= frontier + relevantBBs --= toPrune + done = toPrune.isEmpty + } while(!done) + + lastInstruction.clear() + for(b <- isOnPerimeter; val lastIns = b.toList.reverse find isOnWatchlist) { + lastInstruction += (b -> lastIns.get.asInstanceOf[opcodes.CALL_METHOD]) + } + + // assertion: "no relevant block can have a predecessor that is on perimeter" + assert((for (b <- relevantBBs; if transitivePreds(b.predecessors) exists isOnPerimeter) yield b).isEmpty) + } + + private val isOnPerimeter = mutable.Set.empty[BasicBlock] + private val lastInstruction = mutable.Map.empty[BasicBlock, opcodes.CALL_METHOD] + + def hasNoRelevantSuccs(x: BasicBlock): Boolean = { !(x.successors exists relevantBBs) } + + def isWatching(x: BasicBlock): Boolean = (x.toList exists isOnWatchlist) + + + + + /** + + This method is invoked after one or more inlinings have been performed in basic blocks whose in-flow is non-bottom (this makes a difference later). + What we know about those inlinings is given by: + + - `staleOut`: These are the blocks where a callsite was inlined. + For each callsite, all instructions in that block before the callsite were left in the block, and the rest moved to an `afterBlock`. + The out-flow of these basic blocks is thus in general stale, that's why we'll add them to the TFA worklist. + + - `inlined` : These blocks were spliced into the method's CFG as part of inlining. Being new blocks, they haven't been visited yet by the typeflow analysis. + + - `staleIn` : These blocks are what `doInline()` calls `afterBlock`s, ie the new home for instructions that previously appearead + after a callsite in a `staleOut` block. + + Based on the above information, we have to bring up-to-date the caches that `forwardAnalysis` and `blockTransfer` use to skip blocks and instructions. + Those caches are `relevantBBs` and `isOnPerimeter` (for blocks) and `isOnWatchlist` and `lastInstruction` (for CALL_METHODs). + Please notice that all `inlined` and `staleIn` blocks are reachable from `staleOut` blocks. + + The update takes place in two steps: + + (1) `staleOut foreach { so => putOnRadar(linearizer linearizeAt (m, so)) }` + This results in initial populations for `relevantBBs` and `isOnWatchlist`. + Because of the way `isPreCandidate` reuses previous decision-outcomes that are still valid, + this already prunes some candidates standing no chance of being inlined. + + (2) `populatePerimeter()` + Based on the CFG-subgraph determined in (1) as reflected in `relevantBBs`, + this method detects some blocks whose typeflows aren't needed past a certain CALL_METHOD + (not needed because none of its successors is relevant for the purposes of inlining, see `hasNoRelevantSuccs`). + The blocks thus chosen are said to be "on the perimeter" of the CFG-subgraph. + For each of them, its `lastInstruction` (after which no more typeflows are needed) is found. + + */ def reinit(m: icodes.IMethod, staleOut: List[BasicBlock], inlined: collection.Set[BasicBlock], staleIn: collection.Set[BasicBlock]) { if (this.method == null || this.method.symbol != m.symbol) { init(m) @@ -633,31 +705,102 @@ abstract class TypeFlowAnalysis { return; } - reinit { - // asserts conveying an idea what CFG shapes arrive here. - // staleIn foreach (p => assert( !in.isDefinedAt(p), p)) - // staleIn foreach (p => assert(!out.isDefinedAt(p), p)) - // inlined foreach (p => assert( !in.isDefinedAt(p), p)) - // inlined foreach (p => assert(!out.isDefinedAt(p), p)) - // inlined foreach (p => assert(!p.successors.isEmpty || p.lastInstruction.isInstanceOf[icodes.opcodes.THROW], p)) - // staleOut foreach (p => assert( in.isDefinedAt(p), p)) - - // never rewrite in(m.startBlock) - staleOut foreach { b => - if(!inlined.contains(b)) { worklist += b } - out(b) = typeFlowLattice.bottom - } - // nothing else is added to the worklist, bb's reachable via succs will be tfa'ed - blankOut(inlined) - blankOut(staleIn) - // no need to add startBlocks from m.exh + worklist.clear // calling reinit(f: => Unit) would also clear visited, thus forgetting about blocks visited before reinit. + + // asserts conveying an idea what CFG shapes arrive here: + // staleIn foreach (p => assert( !in.isDefinedAt(p), p)) + // staleIn foreach (p => assert(!out.isDefinedAt(p), p)) + // inlined foreach (p => assert( !in.isDefinedAt(p), p)) + // inlined foreach (p => assert(!out.isDefinedAt(p), p)) + // inlined foreach (p => assert(!p.successors.isEmpty || p.lastInstruction.isInstanceOf[icodes.opcodes.THROW], p)) + // staleOut foreach (p => assert( in.isDefinedAt(p), p)) + + // remainingCALLs.clear() + isOnWatchlist.clear() + relevantBBs.clear() + + // never rewrite in(m.startBlock) + staleOut foreach { b => + enqueue(b) + out(b) = typeFlowLattice.bottom } + // nothing else is added to the worklist, bb's reachable via succs will be tfa'ed + blankOut(inlined) + blankOut(staleIn) + // no need to add startBlocks from m.exh + + staleOut foreach { so => putOnRadar(linearizer linearizeAt (m, so)) } + populatePerimeter() + + } // end of method reinit + + /* this is not a general purpose method to add to the worklist, + * because the assert is expected to hold only when called from MTFAGrowable.reinit() */ + private def enqueue(b: BasicBlock) { + assert(in(b) ne typeFlowLattice.bottom) + if(!worklist.contains(b)) { worklist += b } + } + + /* this is not a general purpose method to add to the worklist, + * because the assert is expected to hold only when called from MTFAGrowable.reinit() */ + private def enqueue(bs: Traversable[BasicBlock]) { + bs foreach enqueue } private def blankOut(blocks: collection.Set[BasicBlock]) { blocks foreach { b => - in(b) = typeFlowLattice.bottom - out(b) = typeFlowLattice.bottom + in(b) = typeFlowLattice.bottom + out(b) = typeFlowLattice.bottom + } + } + + /* + This is basically the plain-old forward-analysis part of a dataflow algorithm, + adapted to skip non-relevant blocks (as determined by `reinit()` via `populatePerimeter()`). + + The adaptations are: + + - only relevant blocks dequeued from the worklist move on to have the transfer function applied + + - `visited` now means the transfer function was applied to the block, + but please notice that this does not imply anymore its out-flow to be different from bottom, + because a block on the perimeter will have per-instruction typeflows computed only up to its `lastInstruction`. + In case you need to know whether a visted block `v` has been "fully visited", evaluate `out(v) ne typeflowLattice.bottom` + + - given that the transfer function may remove callsite-candidates from the watchlist (thus, they are not candidates anymore) + there's an opportunity to detect whether a previously relevant block has been left without candidates. + That's what `shrinkedWatchlist` detects. Provided the block was on the perimeter, we know we can skip it from now now, + and we can also constrain the CFG-subgraph by finding a new perimeter (thus the invocation to `populatePerimeter()`). + */ + override def forwardAnalysis(f: (P, lattice.Elem) => lattice.Elem): Unit = { + while (!worklist.isEmpty && relevantBBs.nonEmpty) { + if (stat) iterations += 1 + val point = worklist.iterator.next; worklist -= point; + if(relevantBBs(point)) { + shrinkedWatchlist = false + val output = f(point, in(point)) + visited += point; + if(isOnPerimeter(point)) { + if(shrinkedWatchlist && !isWatching(point)) { + relevantBBs -= point; + populatePerimeter() + } + } else { + val propagate = ((lattice.bottom == out(point)) || output != out(point)) + if (propagate) { + out(point) = output + val succs = point.successors filter relevantBBs + succs foreach { p => + assert((p.predecessors filter isOnPerimeter).isEmpty) + val updated = lattice.lub(List(output, in(p)), p.exceptionHandlerStart) + if(updated != in(p)) { + in(p) = updated + enqueue(p) + } + } + } + } + } } } diff --git a/src/compiler/scala/tools/nsc/backend/opt/Inliners.scala b/src/compiler/scala/tools/nsc/backend/opt/Inliners.scala index 66f802f74f..3d7f08cebe 100644 --- a/src/compiler/scala/tools/nsc/backend/opt/Inliners.scala +++ b/src/compiler/scala/tools/nsc/backend/opt/Inliners.scala @@ -38,6 +38,33 @@ abstract class Inliners extends SubComponent { res } + /** Look up implementation of method 'sym in 'clazz'. + */ + def lookupImplFor(sym: Symbol, clazz: Symbol): Symbol = { + // TODO: verify that clazz.superClass is equivalent here to clazz.tpe.parents(0).typeSymbol (.tpe vs .info) + def needsLookup = ( + (clazz != NoSymbol) + && (clazz != sym.owner) + && !sym.isEffectivelyFinal + && clazz.isEffectivelyFinal + ) + def lookup(clazz: Symbol): Symbol = { + // println("\t\tlooking up " + meth + " in " + clazz.fullName + " meth.owner = " + meth.owner) + if (sym.owner == clazz || isBottomType(clazz)) sym + else sym.overridingSymbol(clazz) match { + case NoSymbol => if (sym.owner.isTrait) sym else lookup(clazz.superClass) + case imp => imp + } + } + if (needsLookup) { + val concreteMethod = lookup(clazz) + debuglog("\tlooked up method: " + concreteMethod.fullName) + + concreteMethod + } + else sym + } + /* A warning threshold */ private final val MAX_INLINE_MILLIS = 2000 @@ -67,8 +94,7 @@ abstract class Inliners extends SubComponent { try { super.run() } finally { - inliner.NonPublicRefs.usesNonPublics.clear() - inliner.recentTFAs.clear + inliner.clearCaches() } } } @@ -80,6 +106,21 @@ abstract class Inliners extends SubComponent { def isClosureClass(cls: Symbol): Boolean = cls.isFinal && cls.isSynthetic && !cls.isModuleClass && cls.isAnonymousFunction + /* + TODO now that Inliner runs faster we could consider additional "monadic methods" (in the limit, all those taking a closure as last arg) + Any "monadic method" occurring in a given caller C that is not `isMonadicMethod()` will prevent CloseElim from eliminating + any anonymous-closure-class any whose instances are given as argument to C invocations. + */ + def isMonadicMethod(sym: Symbol) = { + nme.unspecializedName(sym.name) match { + case nme.foreach | nme.filter | nme.withFilter | nme.map | nme.flatMap => true + case _ => false + } + } + + def hasInline(sym: Symbol) = sym hasAnnotation ScalaInlineClass + def hasNoInline(sym: Symbol) = sym hasAnnotation ScalaNoInlineClass + /** * Simple inliner. */ @@ -92,9 +133,6 @@ abstract class Inliners extends SubComponent { } import NonPublicRefs._ - private def hasInline(sym: Symbol) = sym hasAnnotation ScalaInlineClass - private def hasNoInline(sym: Symbol) = sym hasAnnotation ScalaNoInlineClass - /** The current iclass */ private var currentIClazz: IClass = _ private def warn(pos: Position, msg: String) = currentIClazz.cunit.warning(pos, msg) @@ -121,6 +159,21 @@ abstract class Inliners extends SubComponent { (hasRETURN, a) } + def clearCaches() { + // methods + NonPublicRefs.usesNonPublics.clear() + recentTFAs.clear + tfa.knownUnsafe.clear() + tfa.knownSafe.clear() + tfa.knownNever.clear() + // basic blocks + tfa.preCandidates.clear() + tfa.relevantBBs.clear() + // callsites + tfa.remainingCALLs.clear() + tfa.isOnWatchlist.clear() + } + def analyzeClass(cls: IClass): Unit = if (settings.inline.value) { debuglog("Analyzing " + cls) @@ -142,7 +195,38 @@ abstract class Inliners extends SubComponent { val splicedBlocks = mutable.Set.empty[BasicBlock] val staleIn = mutable.Set.empty[BasicBlock] + /** + * A transformation local to the body of the argument. + * An linining decision consists in replacing a callsite with the body of the callee. + * Please notice that, because `analyzeMethod()` itself may modify a method body, + * the particular callee bodies that end up being inlined depend on the particular order in which methods are visited + * (no topological ordering over the call-graph is attempted). + * + * Making an inlining decision requires type-flow information for both caller and callee. + * Regarding the caller, such information is needed only for basic blocks containing inlining candidates + * (and their transitive predecessors). This observation leads to using a custom type-flow analysis (MTFAGrowable) + * that can be re-inited, i.e. that reuses lattice elements (type-flow information) computed in a previous iteration + * as starting point for faster convergence in a new iteration. + * + * The mechanics of inlining are iterative for a given invocation of `analyzeMethod(m)`, + * thus considering the basic blocks that successful inlining added in a previous iteration: + * + * (1) before the iterations proper start, so-called preinlining is performed. + * Those callsites whose (receiver, concreteMethod) are both known statically + * can be analyzed for inlining before computing a type-flow. Details in `preInline()` + * + * (2) the first iteration computes type-flow information for basic blocks containing inlining candidates + * (and their transitive predecessors), so called `relevantBBs`. + * The ensuing analysis of each candidate (performed by `analyzeInc()`) + * may result in a CFG isomorphic to that of the callee being inserted where the callsite was + * (i.e. a CALL_METHOD instruction is replaced with a single-entry single-exit CFG, which we call "successful inlining"). + * + * (3) following iterations have their relevant basic blocks updated to focus + * on the inlined basic blocks and their successors only. Details in `MTFAGrowable.reinit()` + * */ def analyzeMethod(m: IMethod): Unit = { + // m.normalize + var sizeBeforeInlining = m.code.blockCount var instrBeforeInlining = m.code.instructionCount var retry = false @@ -154,17 +238,53 @@ abstract class Inliners extends SubComponent { val inlinedMethodCount = mutable.HashMap.empty[Symbol, Int] withDefaultValue 0 val caller = new IMethodInfo(m) - var info: tfa.lattice.Elem = null - def analyzeInc(msym: Symbol, i: Instruction, bb: BasicBlock): Boolean = { - var inlined = false - def paramTypes = msym.info.paramTypes - val receiver = (info.stack.types drop paramTypes.length) match { - case Nil => log("analyzeInc(" + msym + "), no type on the stack!") ; NoSymbol - case REFERENCE(s) :: _ => s - case _ => NoSymbol + def preInline(isFirstRound: Boolean): Int = { + val inputBlocks = caller.m.linearizedBlocks() + val callsites: Function1[BasicBlock, List[opcodes.CALL_METHOD]] = { + if(isFirstRound) tfa.conclusives else tfa.knownBeforehand } - val concreteMethod = lookupImplFor(msym, receiver) + inlineWithoutTFA(inputBlocks, callsites) + } + + /** + * Inline straightforward callsites (those that can be inlined without a TFA). + * + * To perform inlining, all we need to know is listed as formal params in `analyzeInc()`: + * - callsite and block containing it + * - actual (ie runtime) class of the receiver + * - actual (ie runtime) method being invoked + * - stack length just before the callsite (to check whether enough arguments have been pushed). + * The assert below lists the conditions under which "no TFA is needed" + * (the statically known receiver and method are both final, thus, at runtime they can't be any others than those). + * + */ + def inlineWithoutTFA(inputBlocks: Traversable[BasicBlock], callsites: Function1[BasicBlock, List[opcodes.CALL_METHOD]]): Int = { + var inlineCount = 0 + import scala.util.control.Breaks._ + for(x <- inputBlocks; val easyCake = callsites(x); if easyCake.nonEmpty) { + breakable { + for(ocm <- easyCake) { + assert(ocm.method.isEffectivelyFinal && ocm.method.owner.isEffectivelyFinal) + if(analyzeInc(ocm, x, ocm.method.owner, -1, ocm.method)) { + inlineCount += 1 + break + } + } + } + } + + inlineCount + } + + /** + Decides whether it's feasible and desirable to inline the body of the method given by `concreteMethod` + at the program point given by `i` (a callsite). The boolean result indicates whether inlining was performed. + + */ + def analyzeInc(i: CALL_METHOD, bb: BasicBlock, receiver: Symbol, stackLength: Int, concreteMethod: Symbol): Boolean = { + var inlined = false + val msym = i.method def warnNoInline(reason: String) = { if (hasInline(msym) && !caller.isBridge) @@ -209,7 +329,7 @@ abstract class Inliners extends SubComponent { val inc = new IMethodInfo(callee) val pair = new CallerCalleeInfo(caller, inc, fresh, inlinedMethodCount) - if (pair isStampedForInlining info.stack) { + if (pair isStampedForInlining stackLength) { retry = true inlined = true if (isCountable) @@ -228,9 +348,9 @@ abstract class Inliners extends SubComponent { } else { if (settings.debug.value) - pair logFailure info.stack + pair logFailure stackLength - warnNoInline(pair failureReason info.stack) + warnNoInline(pair failureReason stackLength) } case None => warnNoInline("bytecode was not available") @@ -241,38 +361,96 @@ abstract class Inliners extends SubComponent { if (!isAvailable) "bytecode was not available" else "it can be overridden" ) + inlined } - import scala.util.control.Breaks._ + /* Pre-inlining consists in invoking the usual inlining subroutine with (receiver class, concrete method) pairs as input + * where both method and receiver are final, which implies that the receiver computed via TFA will always match `concreteMethod.owner`. + * + * As with any invocation of `analyzeInc()` the inlining outcome is based on heuristics which favor inlining an isMonadicMethod before other methods. + * That's why preInline() is invoked twice: any inlinings downplayed by the heuristics during the first round get an opportunity to rank higher during the second. + * + * As a whole, both `preInline()` invocations amount to priming the inlining process, + * so that the first TFA run afterwards is able to gain more information as compared to a cold-start. + */ + val totalPreInlines = { + val firstRound = preInline(true) + if(firstRound == 0) 0 else (firstRound + preInline(false)) + } + staleOut.clear() + splicedBlocks.clear() + staleIn.clear() + do { retry = false log("Analyzing " + m + " count " + count + " with " + caller.length + " blocks") + + /* it's important not to inline in unreachable basic blocks. linearizedBlocks() returns only reachable ones. */ + tfa.callerLin = caller.m.linearizedBlocks() + /* TODO Do we want to perform inlining in non-finally exception handlers? + * Seems counterproductive (the larger the method the less likely it will be JITed). + * The alternative above would be `linearizer.linearizeAt(caller.m, caller.m.startBlock)`. + * See also comment on the same topic in TypeFlowAnalysis. */ + tfa.reinit(m, staleOut.toList, splicedBlocks, staleIn) tfa.run staleOut.clear() splicedBlocks.clear() staleIn.clear() - caller.m.linearizedBlocks() foreach { bb => - info = tfa in bb - + import scala.util.control.Breaks._ + for(bb <- tfa.callerLin; if tfa.preCandidates(bb)) { + val cms = bb.toList collect { case cm : CALL_METHOD => cm } breakable { - for (i <- bb) { - i match { - // Dynamic == normal invocations - // Static(true) == calls to private members - case CALL_METHOD(msym, Dynamic | Static(true)) if !msym.isConstructor => - if (analyzeInc(msym, i, bb)) { - break - } - case _ => () + for (cm <- cms; if tfa.remainingCALLs.isDefinedAt(cm)) { + val analysis.CallsiteInfo(_, receiver, stackLength, concreteMethod) = tfa.remainingCALLs(cm) + if (analyzeInc(cm, bb, receiver, stackLength, concreteMethod)) { + break } - info = tfa.interpret(info, i) } } + } + + /* As part of inlining, some instructions are moved to a new block. + * In detail: the instructions moved to a new block originally appeared after a (by now inlined) callsite. + * Their new home is an `afterBlock` created by `doInline()` to that effect. + * Each block in staleIn is one such `afterBlock`. + * + * Some of those instructions may be CALL_METHOD possibly tracked in `remainingCALLs` + * (with an entry still noting the old containing block). However, that causes no problem: + * + * (1) such callsites won't be analyzed for inlining by `analyzeInc()` (*in this iteration*) + * because of the `break` that abandons the original basic block where it was contained. + * + * (2) Additionally, its new containing block won't be visited either (*in this iteration*) + * because the new blocks don't show up in the linearization computed before inlinings started: + * `for(bb <- tfa.callerLin; if tfa.preCandidates(bb)) {` + * + * For a next iteration, the new home of any instructions that have moved + * will be tracked properly in `remainingCALLs` after `MTFAGrowable.reinit()` puts on radar their new homes. + * + */ + if(retry) { + for(afterBlock <- staleIn) { + val justCALLsAfter = afterBlock.toList collect { case c : opcodes.CALL_METHOD => c } + for(ia <- justCALLsAfter) { tfa.remainingCALLs.remove(ia) } + } + } + /* + if(splicedBlocks.nonEmpty) { // TODO explore (saves time but leads to slightly different inlining decisions) + // opportunistically perform straightforward inlinings before the next typeflow round + val savedRetry = retry + val savedStaleOut = staleOut.toSet; staleOut.clear() + val savedStaleIn = staleIn.toSet ; staleIn.clear() + val howmany = inlineWithoutTFA(splicedBlocks, tfa.knownBeforehand) + splicedBlocks ++= staleIn + staleOut.clear(); staleOut ++= savedStaleOut; + staleIn.clear(); staleIn ++= savedStaleIn; + retry = savedRetry } + */ if (tfa.stat) log(m.symbol.fullName + " iterations: " + tfa.iterations + " (size: " + caller.length + ")") @@ -288,13 +466,6 @@ abstract class Inliners extends SubComponent { } } - private def isMonadicMethod(sym: Symbol) = { - nme.unspecializedName(sym.name) match { - case nme.foreach | nme.filter | nme.withFilter | nme.map | nme.flatMap => true - case _ => false - } - } - private def isHigherOrderMethod(sym: Symbol) = sym.isMethod && atPhase(currentRun.erasurePhase.prev)(sym.info.paramTypes exists isFunctionType) @@ -308,33 +479,6 @@ abstract class Inliners extends SubComponent { res } - /** Look up implementation of method 'sym in 'clazz'. - */ - def lookupImplFor(sym: Symbol, clazz: Symbol): Symbol = { - // TODO: verify that clazz.superClass is equivalent here to clazz.tpe.parents(0).typeSymbol (.tpe vs .info) - def needsLookup = ( - (clazz != NoSymbol) - && (clazz != sym.owner) - && !sym.isEffectivelyFinal - && clazz.isEffectivelyFinal - ) - def lookup(clazz: Symbol): Symbol = { - // println("\t\tlooking up " + meth + " in " + clazz.fullName + " meth.owner = " + meth.owner) - if (sym.owner == clazz || isBottomType(clazz)) sym - else sym.overridingSymbol(clazz) match { - case NoSymbol => if (sym.owner.isTrait) sym else lookup(clazz.superClass) - case imp => imp - } - } - if (needsLookup) { - val concreteMethod = lookup(clazz) - debuglog("\tlooked up method: " + concreteMethod.fullName) - - concreteMethod - } - else sym - } - class IMethodInfo(val m: IMethod) { val sym = m.symbol val name = sym.name @@ -386,10 +530,13 @@ abstract class Inliners extends SubComponent { /** Inline 'inc' into 'caller' at the given block and instruction. * The instruction must be a CALL_METHOD. */ - def doInline(block: BasicBlock, instr: Instruction) { + def doInline(block: BasicBlock, instr: CALL_METHOD) { staleOut += block + tfa.remainingCALLs.remove(instr) // this bookkpeeping is done here and not in MTFAGrowable.reinit due to (1st) convenience and (2nd) necessity. + tfa.isOnWatchlist.remove(instr) // ditto + val targetPos = instr.pos log("Inlining " + inc.m + " in " + caller.m + " at pos: " + posToStr(targetPos)) @@ -557,10 +704,10 @@ abstract class Inliners extends SubComponent { if (settings.debug.value) icodes.checkValid(caller.m) } - def isStampedForInlining(stack: TypeStack) = - !sameSymbols && inc.m.hasCode && shouldInline && isSafeToInline(stack) + def isStampedForInlining(stackLength: Int) = + !sameSymbols && inc.m.hasCode && shouldInline && isSafeToInline(stackLength) - def logFailure(stack: TypeStack) = log( + def logFailure(stackLength: Int) = log( """|inline failed for %s: | pair.sameSymbols: %s | inc.numInlined < 2: %s @@ -569,13 +716,13 @@ abstract class Inliners extends SubComponent { | shouldInline: %s """.stripMargin.format( inc.m, sameSymbols, inlinedMethodCount(inc.sym) < 2, - inc.m.hasCode, isSafeToInline(stack), shouldInline + inc.m.hasCode, isSafeToInline(stackLength), shouldInline ) ) - def failureReason(stack: TypeStack) = + def failureReason(stackLength: Int) = if (!inc.m.hasCode) "bytecode was unavailable" - else if (!isSafeToInline(stack)) "it is unsafe (target may reference private fields)" + else if (!isSafeToInline(stackLength)) "it is unsafe (target may reference private fields)" else "of a bug (run with -Ylog:inline -Ydebug for more information)" def canAccess(level: NonPublicRefs.Value) = level match { @@ -587,15 +734,26 @@ abstract class Inliners extends SubComponent { private def sameOwner = caller.owner == inc.owner /** A method is safe to inline when: - * - it does not contain calls to private methods when - * called from another class + * - it does not contain calls to private methods when called from another class * - it is not inlined into a position with non-empty stack, * while having a top-level finalizer (see liftedTry problem) * - it is not recursive * Note: * - synthetic private members are made public in this pass. */ - def isSafeToInline(stack: TypeStack): Boolean = { + def isSafeToInline(stackLength: Int): Boolean = { + + if(tfa.blackballed(inc.sym)) { return false } + if(tfa.knownSafe(inc.sym)) { return true } + + if(helperIsSafeToInline(stackLength)) { + tfa.knownSafe += inc.sym; true + } else { + tfa.knownUnsafe += inc.sym; false + } + } + + private def helperIsSafeToInline(stackLength: Int): Boolean = { def makePublic(f: Symbol): Boolean = (inc.m.sourceFile ne NoSourceFile) && (f.isSynthetic || f.isParamAccessor) && { debuglog("Making not-private symbol out of synthetic: " + f) @@ -642,9 +800,10 @@ abstract class Inliners extends SubComponent { }) canAccess(accessNeeded) && { - val isIllegalStack = (stack.length > inc.minimumStack && inc.hasNonFinalizerHandler) + val isIllegalStack = (stackLength > inc.minimumStack && inc.hasNonFinalizerHandler) + !isIllegalStack || { - debuglog("method " + inc.sym + " is used on a non-empty stack with finalizer. Stack: " + stack) + debuglog("method " + inc.sym + " is used on a non-empty stack with finalizer.") false } } |