From bdde010edbc79e506e183e2b9a2b9b19f7b226fb Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 23 Apr 2016 13:42:44 +0800 Subject: [SPARK-14863][SQL] Cache TreeNode's hashCode by default Caching TreeNode's `hashCode` can lead to orders-of-magnitude performance improvement in certain optimizer rules when operating on huge/complex schemas. Author: Josh Rosen Closes #12626 from JoshRosen/cache-treenode-hashcode. --- .../main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 3d0e016a09..5eb8fdf048 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -71,7 +71,9 @@ object CurrentOrigin { } } +// scalastyle:off abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { +// scalastyle:on self: BaseType => val origin: Origin = CurrentOrigin.get @@ -84,6 +86,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { lazy val containsChild: Set[TreeNode[_]] = children.toSet + private lazy val _hashCode: Int = scala.util.hashing.MurmurHash3.productHash(this) + override def hashCode(): Int = _hashCode + /** * Faster version of equality which short-circuits when two treeNodes are the same instance. * We don't just override Object.equals, as doing so prevents the scala compiler from -- cgit v1.2.3