From 96f47647381425be5c8c0e771590d6abe0f4d231 Mon Sep 17 00:00:00 2001 From: Jason Zaugg Date: Wed, 8 Oct 2014 08:53:52 +1000 Subject: Optimize tail calls to avoid findMember calls Cache the member symbols for `Boolean.{||, &&}` per-run, rather than look them up repeatedly. Based on profiling the tail calls phase in the program below, which was distilled by @rmacleod2 from the code generated by macros in https://github.com/paytronix/utils-open/tree/release/2014/ernststavrosgrouper Wall clock time went from 12s to 6.5s. ```scala object Test { def a(): Option[String] = Some("a") def main(args: Array[String]) { a() match { case Some(b1) => a() match { case Some(b2) => a() match { case Some(b3) => a() match { case Some(b4) => a() match { case Some(b5) => a() match { case Some(b6) => a() match { case Some(b7) => a() match { case Some(b8) => a() match { case Some(b9) => a() match { case Some(b10) => a() match { case Some(b11) => a() match { case Some(b12) => a() match { case Some(b13) => a() match { case Some(b14) => a() match { case Some(b15) => a() match { case Some(b16) => a() match { case Some(b17) => a() match { case Some(b18) => a() match { case Some(b19) => a() match { case Some(b20) => println("yay") case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } case None => None } } } ``` --- src/compiler/scala/tools/nsc/transform/TailCalls.scala | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/compiler') diff --git a/src/compiler/scala/tools/nsc/transform/TailCalls.scala b/src/compiler/scala/tools/nsc/transform/TailCalls.scala index ef534f70fd..93a343675b 100644 --- a/src/compiler/scala/tools/nsc/transform/TailCalls.scala +++ b/src/compiler/scala/tools/nsc/transform/TailCalls.scala @@ -265,6 +265,10 @@ abstract class TailCalls extends Transform { !(sym.hasAccessorFlag || sym.isConstructor) } + // intentionally shadowing imports from definitions for performance + val runDefinitions = currentRun.runDefinitions + import runDefinitions.{Boolean_or, Boolean_and} + tree match { case ValDef(_, _, _, _) => if (tree.symbol.isLazy && tree.symbol.hasAnnotation(TailrecClass)) @@ -421,6 +425,10 @@ abstract class TailCalls extends Transform { def traverseNoTail(tree: Tree) = traverse(tree, maybeTailNew = false) def traverseTreesNoTail(trees: List[Tree]) = trees foreach traverseNoTail + // intentionally shadowing imports from definitions for performance + private val runDefinitions = currentRun.runDefinitions + import runDefinitions.{Boolean_or, Boolean_and} + override def traverse(tree: Tree) = tree match { // we're looking for label(x){x} in tail position, since that means `a` is in tail position in a call `label(a)` case LabelDef(_, List(arg), body@Ident(_)) if arg.symbol == body.symbol => -- cgit v1.2.3 From a52b9a776bdcd01d0b706727bc89e060817137aa Mon Sep 17 00:00:00 2001 From: Jason Zaugg Date: Wed, 8 Oct 2014 11:24:47 +1000 Subject: Avoid needlessly deep chains of ClonedTailContext I believe ClonedTailContext was added to avoid the need to mutate the Boolean `ctx.tailPos`. All other calls are forwarded to a delegate context. This commit tries to find a delegate context with the right value of `tailPos` to reuse that, rather than creating a wrapper each time we need to flip that bit. --- .../scala/tools/nsc/transform/TailCalls.scala | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/compiler') diff --git a/src/compiler/scala/tools/nsc/transform/TailCalls.scala b/src/compiler/scala/tools/nsc/transform/TailCalls.scala index 93a343675b..868ee4489d 100644 --- a/src/compiler/scala/tools/nsc/transform/TailCalls.scala +++ b/src/compiler/scala/tools/nsc/transform/TailCalls.scala @@ -129,6 +129,13 @@ abstract class TailCalls extends Transform { } override def toString = s"${method.name} tparams=$tparams tailPos=$tailPos label=$label label info=${label.info}" + final def noTailContext() = clonedTailContext(false) + final def yesTailContext() = clonedTailContext(true) + protected def clonedTailContext(tailPos: Boolean): TailContext = this match { + case _ if this.tailPos == tailPos => this + case clone: ClonedTailContext => clone.that.clonedTailContext(tailPos) + case _ => new ClonedTailContext(this, tailPos) + } } object EmptyTailContext extends TailContext { @@ -174,7 +181,7 @@ abstract class TailCalls extends Transform { } def containsRecursiveCall(t: Tree) = t exists isRecursiveCall } - class ClonedTailContext(that: TailContext, override val tailPos: Boolean) extends TailContext { + class ClonedTailContext(val that: TailContext, override val tailPos: Boolean) extends TailContext { def method = that.method def tparams = that.tparams def methodPos = that.methodPos @@ -183,9 +190,6 @@ abstract class TailCalls extends Transform { } private var ctx: TailContext = EmptyTailContext - private def noTailContext() = new ClonedTailContext(ctx, tailPos = false) - private def yesTailContext() = new ClonedTailContext(ctx, tailPos = true) - override def transformUnit(unit: CompilationUnit): Unit = { try { @@ -206,11 +210,11 @@ abstract class TailCalls extends Transform { finally this.ctx = saved } - def yesTailTransform(tree: Tree): Tree = transform(tree, yesTailContext()) - def noTailTransform(tree: Tree): Tree = transform(tree, noTailContext()) + def yesTailTransform(tree: Tree): Tree = transform(tree, ctx.yesTailContext()) + def noTailTransform(tree: Tree): Tree = transform(tree, ctx.noTailContext()) def noTailTransforms(trees: List[Tree]) = { - val nctx = noTailContext() - trees map (t => transform(t, nctx)) + val nctx = ctx.noTailContext() + trees mapConserve (t => transform(t, nctx)) } override def transform(tree: Tree): Tree = { -- cgit v1.2.3 From d6cb47cbc4bc5468b42eda87a90c865728b84c6a Mon Sep 17 00:00:00 2001 From: Jason Zaugg Date: Wed, 8 Oct 2014 11:28:10 +1000 Subject: Wider use of mapConserve and the like in TailCalls Save the Trees! When a subtransform is an identity, we must strive to return the identical tree to enable the lazy part of LazyTreeCopier. If we get this wrong in a leaf, all parents are wastefully copied. --- src/compiler/scala/tools/nsc/transform/TailCalls.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/compiler') diff --git a/src/compiler/scala/tools/nsc/transform/TailCalls.scala b/src/compiler/scala/tools/nsc/transform/TailCalls.scala index 868ee4489d..49bca5364c 100644 --- a/src/compiler/scala/tools/nsc/transform/TailCalls.scala +++ b/src/compiler/scala/tools/nsc/transform/TailCalls.scala @@ -320,8 +320,13 @@ abstract class TailCalls extends Transform { // the assumption is once we encounter a case, the remainder of the block will consist of cases // the prologue may be empty, usually it is the valdef that stores the scrut val (prologue, cases) = stats span (s => !s.isInstanceOf[LabelDef]) + val transformedPrologue = noTailTransforms(prologue) + val transformedCases = transformTrees(cases) + val transformedStats = + if ((prologue eq transformedPrologue) && (cases eq transformedCases)) stats // allow reuse of `tree` if the subtransform was an identity + else transformedPrologue ++ transformedCases treeCopy.Block(tree, - noTailTransforms(prologue) ++ transformTrees(cases), + transformedStats, transform(expr) ) -- cgit v1.2.3 From f6d72991bbba9a9610a9873424b3506f2b26be3f Mon Sep 17 00:00:00 2001 From: Jason Zaugg Date: Wed, 8 Oct 2014 11:30:23 +1000 Subject: SI-8893 Restore linear perf in TailCalls with nested matches Compilation perfomance of the enclosed test regressed when the new pattern matcher was introduced, specifically when the tail call elimination phase was made aware of its tree shapes in cd3d34203. The code added in that commit detects an application to a tail label in order to treat recursive calls in the argument as in tail position. If the transform of that argument makes no change, it falls back to `rewriteApply`, which transforms the argument again (although this time on a non-tail-position context.) This commit avoids the second transform by introducing a flag to `rewriteApply` to mark the arguments are pre-transformed. I don't yet see how that fixes the exponential performance, as on the surface it seems like a constant factor improvement. But the numbers don't lie, and we can now compile the test case in seconds, rather than before when it was running for > 10 minutes. This test case was based on a code pattern generated by the Avro serializer macro in: https://github.com/paytronix/utils-open/tree/release/2014/ernststavrosgrouper The exponential performance in that context is visualed here: https://twitter.com/dridus/status/519544110173007872 Thanks for @rmacleod2 for minimizing the problem. --- .../scala/tools/nsc/transform/TailCalls.scala | 6 +- test/files/pos/t8893.scala | 129 +++++++++++++++++++++ 2 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 test/files/pos/t8893.scala (limited to 'src/compiler') diff --git a/src/compiler/scala/tools/nsc/transform/TailCalls.scala b/src/compiler/scala/tools/nsc/transform/TailCalls.scala index 49bca5364c..16ea3ea90f 100644 --- a/src/compiler/scala/tools/nsc/transform/TailCalls.scala +++ b/src/compiler/scala/tools/nsc/transform/TailCalls.scala @@ -219,7 +219,7 @@ abstract class TailCalls extends Transform { override def transform(tree: Tree): Tree = { /* A possibly polymorphic apply to be considered for tail call transformation. */ - def rewriteApply(target: Tree, fun: Tree, targs: List[Tree], args: List[Tree]) = { + def rewriteApply(target: Tree, fun: Tree, targs: List[Tree], args: List[Tree], mustTransformArgs: Boolean = true) = { val receiver: Tree = fun match { case Select(qual, _) => qual case _ => EmptyTree @@ -227,7 +227,7 @@ abstract class TailCalls extends Transform { def receiverIsSame = ctx.enclosingType.widen =:= receiver.tpe.widen def receiverIsSuper = ctx.enclosingType.widen <:< receiver.tpe.widen def isRecursiveCall = (ctx.method eq fun.symbol) && ctx.tailPos - def transformArgs = noTailTransforms(args) + def transformArgs = if (mustTransformArgs) noTailTransforms(args) else args def matchesTypeArgs = ctx.tparams sameElements (targs map (_.tpe.typeSymbol)) /* Records failure reason in Context for reporting. @@ -393,7 +393,7 @@ abstract class TailCalls extends Transform { if (res ne arg) treeCopy.Apply(tree, fun, res :: Nil) else - rewriteApply(fun, fun, Nil, args) + rewriteApply(fun, fun, Nil, args, mustTransformArgs = false) case Apply(fun, args) => rewriteApply(fun, fun, Nil, args) diff --git a/test/files/pos/t8893.scala b/test/files/pos/t8893.scala new file mode 100644 index 0000000000..b87c8bdd3c --- /dev/null +++ b/test/files/pos/t8893.scala @@ -0,0 +1,129 @@ +// Took > 10 minutes to run the tail call phase. +object Test { + def a(): Option[String] = Some("a") + + def main(args: Array[String]) { + a() match { + case Some(b1) => + a() match { + case Some(b2) => + a() match { + case Some(b3) => + a() match { + case Some(b4) => + a() match { + case Some(b5) => + a() match { + case Some(b6) => + a() match { + case Some(b7) => + a() match { + case Some(b8) => + a() match { + case Some(b9) => + a() match { + case Some(b10) => + a() match { + case Some(b11) => + a() match { + case Some(b12) => + a() match { + case Some(b13) => + a() match { + case Some(b14) => + a() match { + case Some(b15) => + a() match { + case Some(b16) => + a() match { + case Some(b17) => + a() match { + case Some(b18) => + a() match { + case Some(b19) => + a() match { + case Some(b20) => + a() match { + case Some(b21) => + a() match { + case Some(b22) => + a() match { + case Some(b23) => + a() match { + case Some(b24) => + a() match { + case Some(b25) => + a() match { + case Some(b26) => + a() match { + case Some(b27) => + a() match { + case Some(b28) => + a() match { + case Some(b29) => + a() match { + case Some(b30) => + println("yay") + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + case None => None + } + } +} + -- cgit v1.2.3 From 10b0d216a3e3b011103a7c939fabe441440f9aa1 Mon Sep 17 00:00:00 2001 From: Jason Zaugg Date: Wed, 8 Oct 2014 13:09:24 +1000 Subject: Avoid wasteful array creation in backend The iteration is only needed to side effect on `instructionList` and `code.touched`. This commit uses an iterator and `foreach`, rather than creating throwaway Arrays. % time (for f in {1..200}; do echo test/files/pos/t8893.scala; done | qscalac -Xresident) Before: 30s After : 21s --- .../scala/tools/nsc/backend/icode/BasicBlocks.scala | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/compiler') diff --git a/src/compiler/scala/tools/nsc/backend/icode/BasicBlocks.scala b/src/compiler/scala/tools/nsc/backend/icode/BasicBlocks.scala index f9551697d2..ad1975ef23 100644 --- a/src/compiler/scala/tools/nsc/backend/icode/BasicBlocks.scala +++ b/src/compiler/scala/tools/nsc/backend/icode/BasicBlocks.scala @@ -300,14 +300,16 @@ trait BasicBlocks { if (!closed) instructionList = instructionList map (x => map.getOrElse(x, x)) else - instrs.zipWithIndex collect { - case (oldInstr, i) if map contains oldInstr => - // SI-6288 clone important here because `replaceInstruction` assigns - // a position to `newInstr`. Without this, a single instruction can - // be added twice, and the position last position assigned clobbers - // all previous positions in other usages. - val newInstr = map(oldInstr).clone() - code.touched |= replaceInstruction(i, newInstr) + instrs.iterator.zipWithIndex foreach { + case (oldInstr, i) => + if (map contains oldInstr) { + // SI-6288 clone important here because `replaceInstruction` assigns + // a position to `newInstr`. Without this, a single instruction can + // be added twice, and the position last position assigned clobbers + // all previous positions in other usages. + val newInstr = map(oldInstr).clone() + code.touched |= replaceInstruction(i, newInstr) + } } ////////////////////// Emit ////////////////////// -- cgit v1.2.3