diff options
author | Joan <joan@goyeau.com> | 2016-04-22 12:24:12 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-04-22 12:24:12 +0100 |
commit | bf95b8da2774620cd62fa36bd8bf37725ad3fc7d (patch) | |
tree | b257a13641f72ed5b0b0eff34ef0bf64374c7c1d /core | |
parent | e09ab5da8b02da98d7b2496d549c1d53cceb8728 (diff) | |
download | spark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.tar.gz spark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.tar.bz2 spark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.zip |
[SPARK-6429] Implement hashCode and equals together
## What changes were proposed in this pull request?
Implement some `hashCode` and `equals` together in order to enable the scalastyle.
This is a first batch, I will continue to implement them but I wanted to know your thoughts.
Author: Joan <joan@goyeau.com>
Closes #12157 from joan38/SPARK-6429-HashCode-Equals.
Diffstat (limited to 'core')
10 files changed, 38 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala index dd3f28e419..e10660793d 100644 --- a/core/src/main/scala/org/apache/spark/Partition.scala +++ b/core/src/main/scala/org/apache/spark/Partition.scala @@ -28,4 +28,6 @@ trait Partition extends Serializable { // A better default implementation of HashCode override def hashCode(): Int = index + + override def equals(other: Any): Boolean = super.equals(other) } diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala index 7bc1eb0436..2381f54ee3 100644 --- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala @@ -58,10 +58,10 @@ private[spark] case class NarrowCoGroupSplitDep( * narrowDeps should always be equal to the number of parents. */ private[spark] class CoGroupPartition( - idx: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]]) + override val index: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]]) extends Partition with Serializable { - override val index: Int = idx - override def hashCode(): Int = idx + override def hashCode(): Int = index + override def equals(other: Any): Boolean = super.equals(other) } /** diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index 6b1e15572c..b22134af45 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -53,14 +53,14 @@ import org.apache.spark.util.{NextIterator, SerializableConfiguration, ShutdownH /** * A Spark split class that wraps around a Hadoop InputSplit. */ -private[spark] class HadoopPartition(rddId: Int, idx: Int, s: InputSplit) +private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: InputSplit) extends Partition { val inputSplit = new SerializableWritable[InputSplit](s) - override def hashCode(): Int = 41 * (41 + rddId) + idx + override def hashCode(): Int = 31 * (31 + rddId) + index - override val index: Int = idx + override def equals(other: Any): Boolean = super.equals(other) /** * Get any environment variables that should be added to the users environment when running pipes diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index a71c191b31..ad7c2216a0 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -45,7 +45,10 @@ private[spark] class NewHadoopPartition( extends Partition { val serializableHadoopSplit = new SerializableWritable(rawSplit) - override def hashCode(): Int = 41 * (41 + rddId) + index + + override def hashCode(): Int = 31 * (31 + rddId) + index + + override def equals(other: Any): Boolean = super.equals(other) } /** diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala index 0abba15bec..b6366f3e68 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala @@ -31,12 +31,13 @@ import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDDPartition( @transient val rdds: Seq[RDD[_]], - val idx: Int + override val index: Int ) extends Partition { - var parents = rdds.map(_.partitions(idx)).toArray + var parents = rdds.map(_.partitions(index)).toArray - override val index = idx - override def hashCode(): Int = idx + override def hashCode(): Int = index + + override def equals(other: Any): Boolean = super.equals(other) @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala index 800b42505d..29d5d74650 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala @@ -25,7 +25,10 @@ import org.apache.spark.serializer.Serializer private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { override val index: Int = idx - override def hashCode(): Int = idx + + override def hashCode(): Int = index + + override def equals(other: Any): Boolean = super.equals(other) } /** diff --git a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala index d8d818ceed..8386869237 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala @@ -18,6 +18,7 @@ package org.apache.spark.scheduler import java.util.Arrays +import java.util.Objects import org.apache.spark._ import org.apache.spark.rdd.RDD @@ -53,6 +54,9 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A parentPartitionMapping(parent.getPartition(key)) } + override def hashCode(): Int = + 31 * Objects.hashCode(parent) + Arrays.hashCode(partitionStartIndices) + override def equals(other: Any): Boolean = other match { case c: CoalescedPartitioner => c.parent == parent && Arrays.equals(c.partitionStartIndices, partitionStartIndices) @@ -66,6 +70,8 @@ private[spark] class CustomShuffledRDDPartition( extends Partition { override def hashCode(): Int = index + + override def equals(other: Any): Boolean = super.equals(other) } /** diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index 27d063630b..57a8231200 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -476,6 +476,9 @@ object KryoTest { class ClassWithNoArgConstructor { var x: Int = 0 + + override def hashCode(): Int = x + override def equals(other: Any): Boolean = other match { case c: ClassWithNoArgConstructor => x == c.x case _ => false @@ -483,6 +486,8 @@ object KryoTest { } class ClassWithoutNoArgConstructor(val x: Int) { + override def hashCode(): Int = x + override def equals(other: Any): Boolean = other match { case c: ClassWithoutNoArgConstructor => x == c.x case _ => false diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala index 932704c1a3..4920b7ee8b 100644 --- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala @@ -124,6 +124,8 @@ class ClosureCleanerSuite extends SparkFunSuite { // A non-serializable class we create in closures to make sure that we aren't // keeping references to unneeded variables from our outer closures. class NonSerializable(val id: Int = -1) { + override def hashCode(): Int = id + override def equals(other: Any): Boolean = { other match { case o: NonSerializable => id == o.id diff --git a/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala index c787b5f066..ea22db3555 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala @@ -22,4 +22,8 @@ package org.apache.spark.util.collection */ case class FixedHashObject(v: Int, h: Int) extends Serializable { override def hashCode(): Int = h + override def equals(other: Any): Boolean = other match { + case that: FixedHashObject => v == that.v && h == that.h + case _ => false + } } |