diff options
author | Josh Rosen <joshrosen@databricks.com> | 2015-08-28 11:51:42 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-08-28 12:05:37 -0700 |
commit | ccda27a9beb97b11c2522a0700165fd849af44b1 (patch) | |
tree | 605f301c793328333a436926a1e3263210b4c13c | |
parent | 0abbc181380e644374f4217ee84b76fae035aee2 (diff) | |
download | spark-ccda27a9beb97b11c2522a0700165fd849af44b1.tar.gz spark-ccda27a9beb97b11c2522a0700165fd849af44b1.tar.bz2 spark-ccda27a9beb97b11c2522a0700165fd849af44b1.zip |
[SPARK-10325] Override hashCode() for public Row
This commit fixes an issue where the public SQL `Row` class did not override `hashCode`, causing it to violate the hashCode() + equals() contract. To fix this, I simply ported the `hashCode` implementation from the 1.4.x version of `Row`.
Author: Josh Rosen <joshrosen@databricks.com>
Closes #8500 from JoshRosen/SPARK-10325 and squashes the following commits:
51ffea1 [Josh Rosen] Override hashCode() for public Row.
(cherry picked from commit d3f87dc39480f075170817bbd00142967a938078)
Signed-off-by: Michael Armbrust <michael@databricks.com>
Conflicts:
sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala | 15 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala | 9 |
2 files changed, 23 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index ec895af9c3..088b7e1211 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.InternalRow +import scala.util.hashing.MurmurHash3 + import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.StructType @@ -410,6 +411,18 @@ trait Row extends Serializable { true } + override def hashCode: Int = { + // Using Scala's Seq hash code implementation. + var n = 0 + var h = MurmurHash3.seqSeed + val len = length + while (n < len) { + h = MurmurHash3.mix(h, apply(n).##) + n += 1 + } + MurmurHash3.finalizeHash(h, n) + } + /* ---------------------- utility methods for Scala ---------------------- */ /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala index 795d4e983f..77ccd6f775 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala @@ -85,4 +85,13 @@ class RowSuite extends SparkFunSuite with SharedSQLContext { val r2 = Row(Double.NaN) assert(r1 === r2) } + + test("equals and hashCode") { + val r1 = Row("Hello") + val r2 = Row("Hello") + assert(r1 === r2) + assert(r1.hashCode() === r2.hashCode()) + val r3 = Row("World") + assert(r3.hashCode() != r1.hashCode()) + } } |