aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-08-28 11:51:42 -0700
committerMichael Armbrust <michael@databricks.com>2015-08-28 12:05:37 -0700
commitccda27a9beb97b11c2522a0700165fd849af44b1 (patch)
tree605f301c793328333a436926a1e3263210b4c13c
parent0abbc181380e644374f4217ee84b76fae035aee2 (diff)
downloadspark-ccda27a9beb97b11c2522a0700165fd849af44b1.tar.gz
spark-ccda27a9beb97b11c2522a0700165fd849af44b1.tar.bz2
spark-ccda27a9beb97b11c2522a0700165fd849af44b1.zip
[SPARK-10325] Override hashCode() for public Row
This commit fixes an issue where the public SQL `Row` class did not override `hashCode`, causing it to violate the hashCode() + equals() contract. To fix this, I simply ported the `hashCode` implementation from the 1.4.x version of `Row`. Author: Josh Rosen <joshrosen@databricks.com> Closes #8500 from JoshRosen/SPARK-10325 and squashes the following commits: 51ffea1 [Josh Rosen] Override hashCode() for public Row. (cherry picked from commit d3f87dc39480f075170817bbd00142967a938078) Signed-off-by: Michael Armbrust <michael@databricks.com> Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala15
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala9
2 files changed, 23 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index ec895af9c3..088b7e1211 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -17,7 +17,8 @@
package org.apache.spark.sql
-import org.apache.spark.sql.catalyst.InternalRow
+import scala.util.hashing.MurmurHash3
+
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types.StructType
@@ -410,6 +411,18 @@ trait Row extends Serializable {
true
}
+ override def hashCode: Int = {
+ // Using Scala's Seq hash code implementation.
+ var n = 0
+ var h = MurmurHash3.seqSeed
+ val len = length
+ while (n < len) {
+ h = MurmurHash3.mix(h, apply(n).##)
+ n += 1
+ }
+ MurmurHash3.finalizeHash(h, n)
+ }
+
/* ---------------------- utility methods for Scala ---------------------- */
/**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
index 795d4e983f..77ccd6f775 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -85,4 +85,13 @@ class RowSuite extends SparkFunSuite with SharedSQLContext {
val r2 = Row(Double.NaN)
assert(r1 === r2)
}
+
+ test("equals and hashCode") {
+ val r1 = Row("Hello")
+ val r2 = Row("Hello")
+ assert(r1 === r2)
+ assert(r1.hashCode() === r2.hashCode())
+ val r3 = Row("World")
+ assert(r3.hashCode() != r1.hashCode())
+ }
}