aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2016-06-03 13:53:02 -0700
committerJosh Rosen <joshrosen@databricks.com>2016-06-03 13:53:02 -0700
commite526913989d6099064886ea3ed3f6a2a0376a4f8 (patch)
treee9f81934a0bda3e8ac2fbaf4fc50ccb249761eed
parentefd3b11a47ec553f68ab6aa714167f3aee2ecf56 (diff)
downloadspark-e526913989d6099064886ea3ed3f6a2a0376a4f8.tar.gz
spark-e526913989d6099064886ea3ed3f6a2a0376a4f8.tar.bz2
spark-e526913989d6099064886ea3ed3f6a2a0376a4f8.zip
[SPARK-15742][SQL] Reduce temp collections allocations in TreeNode transform methods
In Catalyst's TreeNode transform methods we end up calling `productIterator.map(...).toArray` in a number of places, which is slightly inefficient because it needs to allocate an `ArrayBuilder` and grow a temporary array. Since we already know the size of the final output (`productArity`), we can simply allocate an array up-front and use a while loop to consume the iterator and populate the array. For most workloads, this performance difference is negligible but it does make a measurable difference in optimizer performance for queries that operate over very wide schemas (such as the benchmark queries in #13456). ### Perf results (from #13456 benchmarks) **Before** ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_66-b17 on Mac OS X 10.10.5 Intel(R) Core(TM) i7-4960HQ CPU 2.60GHz parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ 1 select expressions 19 / 22 0.0 19119858.0 1.0X 10 select expressions 23 / 25 0.0 23208774.0 0.8X 100 select expressions 55 / 73 0.0 54768402.0 0.3X 1000 select expressions 229 / 259 0.0 228606373.0 0.1X 2500 select expressions 530 / 554 0.0 529938178.0 0.0X ``` **After** ``` parsing large select: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------ 1 select expressions 15 / 21 0.0 14978203.0 1.0X 10 select expressions 22 / 27 0.0 22492262.0 0.7X 100 select expressions 48 / 64 0.0 48449834.0 0.3X 1000 select expressions 189 / 208 0.0 189346428.0 0.1X 2500 select expressions 429 / 449 0.0 428943897.0 0.0X ``` ### Author: Josh Rosen <joshrosen@databricks.com> Closes #13484 from JoshRosen/treenode-productiterator-map.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala26
2 files changed, 23 insertions, 9 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 6784c3ae1d..3de15a9a3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -172,7 +172,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
case null => null
}
- val newArgs = productIterator.map(recursiveTransform).toArray
+ val newArgs = mapProductIterator(recursiveTransform)
if (changed) makeCopy(newArgs).asInstanceOf[this.type] else this
}
@@ -206,7 +206,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
case null => null
}
- val newArgs = productIterator.map(recursiveTransform).toArray
+ val newArgs = mapProductIterator(recursiveTransform)
if (changed) makeCopy(newArgs).asInstanceOf[this.type] else this
}
@@ -318,7 +318,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
case other => other
}
- productIterator.map {
+ mapProductIterator {
case s: Option[_] => s.map(cleanArg)
case s: Seq[_] => s.map(cleanArg)
case m: Map[_, _] => m.mapValues(cleanArg)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 50481cd298..22d82c6108 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -21,6 +21,7 @@ import java.util.UUID
import scala.collection.Map
import scala.collection.mutable.Stack
+import scala.reflect.ClassTag
import org.apache.commons.lang.ClassUtils
import org.json4s.JsonAST._
@@ -169,11 +170,24 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
}
/**
+ * Efficient alternative to `productIterator.map(f).toArray`.
+ */
+ protected def mapProductIterator[B: ClassTag](f: Any => B): Array[B] = {
+ val arr = Array.ofDim[B](productArity)
+ var i = 0
+ while (i < arr.length) {
+ arr(i) = f(productElement(i))
+ i += 1
+ }
+ arr
+ }
+
+ /**
* Returns a copy of this node where `f` has been applied to all the nodes children.
*/
def mapChildren(f: BaseType => BaseType): BaseType = {
var changed = false
- val newArgs = productIterator.map {
+ val newArgs = mapProductIterator {
case arg: TreeNode[_] if containsChild(arg) =>
val newChild = f(arg.asInstanceOf[BaseType])
if (newChild fastEquals arg) {
@@ -184,7 +198,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
}
case nonChild: AnyRef => nonChild
case null => null
- }.toArray
+ }
if (changed) makeCopy(newArgs) else this
}
@@ -197,7 +211,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
var changed = false
val remainingNewChildren = newChildren.toBuffer
val remainingOldChildren = children.toBuffer
- val newArgs = productIterator.map {
+ val newArgs = mapProductIterator {
case s: StructType => s // Don't convert struct types to some other type of Seq[StructField]
// Handle Seq[TreeNode] in TreeNode parameters.
case s: Seq[_] => s.map {
@@ -237,7 +251,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
}
case nonChild: AnyRef => nonChild
case null => null
- }.toArray
+ }
if (changed) makeCopy(newArgs) else this
}
@@ -302,7 +316,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
rule: PartialFunction[BaseType, BaseType],
nextOperation: (BaseType, PartialFunction[BaseType, BaseType]) => BaseType): BaseType = {
var changed = false
- val newArgs = productIterator.map {
+ val newArgs = mapProductIterator {
case arg: TreeNode[_] if containsChild(arg) =>
val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
if (!(newChild fastEquals arg)) {
@@ -353,7 +367,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
}
case nonChild: AnyRef => nonChild
case null => null
- }.toArray
+ }
if (changed) makeCopy(newArgs) else this
}