aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2016-04-23 13:42:44 +0800
committerWenchen Fan <wenchen@databricks.com>2016-04-23 13:42:44 +0800
commitbdde010edbc79e506e183e2b9a2b9b19f7b226fb (patch)
tree23cd09d32b517281d3de58c60809b98f7292cbcf
parent39a77e15678f62b0b2b00b9ab9ee0e5560a0f14d (diff)
downloadspark-bdde010edbc79e506e183e2b9a2b9b19f7b226fb.tar.gz
spark-bdde010edbc79e506e183e2b9a2b9b19f7b226fb.tar.bz2
spark-bdde010edbc79e506e183e2b9a2b9b19f7b226fb.zip
[SPARK-14863][SQL] Cache TreeNode's hashCode by default
Caching TreeNode's `hashCode` can lead to orders-of-magnitude performance improvement in certain optimizer rules when operating on huge/complex schemas. Author: Josh Rosen <joshrosen@databricks.com> Closes #12626 from JoshRosen/cache-treenode-hashcode.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala5
1 files changed, 5 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 3d0e016a09..5eb8fdf048 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -71,7 +71,9 @@ object CurrentOrigin {
}
}
+// scalastyle:off
abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
+// scalastyle:on
self: BaseType =>
val origin: Origin = CurrentOrigin.get
@@ -84,6 +86,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
lazy val containsChild: Set[TreeNode[_]] = children.toSet
+ private lazy val _hashCode: Int = scala.util.hashing.MurmurHash3.productHash(this)
+ override def hashCode(): Int = _hashCode
+
/**
* Faster version of equality which short-circuits when two treeNodes are the same instance.
* We don't just override Object.equals, as doing so prevents the scala compiler from