diff options
author | Josh Rosen <joshrosen@databricks.com> | 2015-08-28 11:51:42 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-08-28 11:51:42 -0700 |
commit | d3f87dc39480f075170817bbd00142967a938078 (patch) | |
tree | 7f28c76093c43f44d6c3e2d5e5b3e7f12e00fd80 | |
parent | 499e8e154bdcc9d7b2f685b159e0ddb4eae48fe4 (diff) | |
download | spark-d3f87dc39480f075170817bbd00142967a938078.tar.gz spark-d3f87dc39480f075170817bbd00142967a938078.tar.bz2 spark-d3f87dc39480f075170817bbd00142967a938078.zip |
[SPARK-10325] Override hashCode() for public Row
This commit fixes an issue where the public SQL `Row` class did not override `hashCode`, causing it to violate the hashCode() + equals() contract. To fix this, I simply ported the `hashCode` implementation from the 1.4.x version of `Row`.
Author: Josh Rosen <joshrosen@databricks.com>
Closes #8500 from JoshRosen/SPARK-10325 and squashes the following commits:
51ffea1 [Josh Rosen] Override hashCode() for public Row.
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala | 13 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala | 9 |
2 files changed, 22 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index cfd9cb0e62..ed2fdf9f2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql import scala.collection.JavaConverters._ +import scala.util.hashing.MurmurHash3 import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericRow @@ -410,6 +411,18 @@ trait Row extends Serializable { true } + override def hashCode: Int = { + // Using Scala's Seq hash code implementation. + var n = 0 + var h = MurmurHash3.seqSeed + val len = length + while (n < len) { + h = MurmurHash3.mix(h, apply(n).##) + n += 1 + } + MurmurHash3.finalizeHash(h, n) + } + /* ---------------------- utility methods for Scala ---------------------- */ /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala index 795d4e983f..77ccd6f775 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala @@ -85,4 +85,13 @@ class RowSuite extends SparkFunSuite with SharedSQLContext { val r2 = Row(Double.NaN) assert(r1 === r2) } + + test("equals and hashCode") { + val r1 = Row("Hello") + val r2 = Row("Hello") + assert(r1 === r2) + assert(r1.hashCode() === r2.hashCode()) + val r3 = Row("World") + assert(r3.hashCode() != r1.hashCode()) + } } |