aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2016-06-04 14:14:50 -0700
committerJosh Rosen <joshrosen@databricks.com>2016-06-04 14:14:50 -0700
commit091f81e1f7ef1581376c71e3872ce06f4c1713bd (patch)
tree3a136680bd86eea5cff5954554647389c43649eb
parent681387b2dc9a094cfba84188a1dd1ac9192bb99c (diff)
downloadspark-091f81e1f7ef1581376c71e3872ce06f4c1713bd.tar.gz
spark-091f81e1f7ef1581376c71e3872ce06f4c1713bd.tar.bz2
spark-091f81e1f7ef1581376c71e3872ce06f4c1713bd.zip
[SPARK-15762][SQL] Cache Metadata & StructType hashCodes; use singleton Metadata.empty
We should cache `Metadata.hashCode` and use a singleton for `Metadata.empty` because calculating metadata hashCodes appears to be a bottleneck for certain workloads. We should also cache `StructType.hashCode`. In an optimizer stress-test benchmark run by ericl, these `hashCode` calls accounted for roughly 40% of the total CPU time and this bottleneck was completely eliminated by the caching added by this patch. Author: Josh Rosen <joshrosen@databricks.com> Closes #13504 from JoshRosen/metadata-fix.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala7
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala3
2 files changed, 7 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index 1fb2e2404c..657bd86ce1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -104,7 +104,8 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
}
}
- override def hashCode: Int = Metadata.hash(this)
+ private lazy val _hashCode: Int = Metadata.hash(this)
+ override def hashCode: Int = _hashCode
private def get[T](key: String): T = {
map(key).asInstanceOf[T]
@@ -115,8 +116,10 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
object Metadata {
+ private[this] val _empty = new Metadata(Map.empty)
+
/** Returns an empty Metadata. */
- def empty: Metadata = new Metadata(Map.empty)
+ def empty: Metadata = _empty
/** Creates a Metadata instance from JSON. */
def fromJson(json: String): Metadata = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index fd2b524e22..9a92373759 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -112,7 +112,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
}
}
- override def hashCode(): Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]])
+ private lazy val _hashCode: Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]])
+ override def hashCode(): Int = _hashCode
/**
* Creates a new [[StructType]] by adding a new field.