aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJoan <joan@goyeau.com>2016-04-22 12:24:12 +0100
committerSean Owen <sowen@cloudera.com>2016-04-22 12:24:12 +0100
commitbf95b8da2774620cd62fa36bd8bf37725ad3fc7d (patch)
treeb257a13641f72ed5b0b0eff34ef0bf64374c7c1d /core
parente09ab5da8b02da98d7b2496d549c1d53cceb8728 (diff)
downloadspark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.tar.gz
spark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.tar.bz2
spark-bf95b8da2774620cd62fa36bd8bf37725ad3fc7d.zip
[SPARK-6429] Implement hashCode and equals together
## What changes were proposed in this pull request? Implement some `hashCode` and `equals` together in order to enable the scalastyle. This is a first batch, I will continue to implement them but I wanted to know your thoughts. Author: Joan <joan@goyeau.com> Closes #12157 from joan38/SPARK-6429-HashCode-Equals.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/Partition.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala5
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala9
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala5
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala6
-rw-r--r--core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala5
-rw-r--r--core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala4
10 files changed, 38 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala
index dd3f28e419..e10660793d 100644
--- a/core/src/main/scala/org/apache/spark/Partition.scala
+++ b/core/src/main/scala/org/apache/spark/Partition.scala
@@ -28,4 +28,6 @@ trait Partition extends Serializable {
// A better default implementation of HashCode
override def hashCode(): Int = index
+
+ override def equals(other: Any): Boolean = super.equals(other)
}
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 7bc1eb0436..2381f54ee3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -58,10 +58,10 @@ private[spark] case class NarrowCoGroupSplitDep(
* narrowDeps should always be equal to the number of parents.
*/
private[spark] class CoGroupPartition(
- idx: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]])
+ override val index: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]])
extends Partition with Serializable {
- override val index: Int = idx
- override def hashCode(): Int = idx
+ override def hashCode(): Int = index
+ override def equals(other: Any): Boolean = super.equals(other)
}
/**
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 6b1e15572c..b22134af45 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -53,14 +53,14 @@ import org.apache.spark.util.{NextIterator, SerializableConfiguration, ShutdownH
/**
* A Spark split class that wraps around a Hadoop InputSplit.
*/
-private[spark] class HadoopPartition(rddId: Int, idx: Int, s: InputSplit)
+private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: InputSplit)
extends Partition {
val inputSplit = new SerializableWritable[InputSplit](s)
- override def hashCode(): Int = 41 * (41 + rddId) + idx
+ override def hashCode(): Int = 31 * (31 + rddId) + index
- override val index: Int = idx
+ override def equals(other: Any): Boolean = super.equals(other)
/**
* Get any environment variables that should be added to the users environment when running pipes
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index a71c191b31..ad7c2216a0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -45,7 +45,10 @@ private[spark] class NewHadoopPartition(
extends Partition {
val serializableHadoopSplit = new SerializableWritable(rawSplit)
- override def hashCode(): Int = 41 * (41 + rddId) + index
+
+ override def hashCode(): Int = 31 * (31 + rddId) + index
+
+ override def equals(other: Any): Boolean = super.equals(other)
}
/**
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index 0abba15bec..b6366f3e68 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -31,12 +31,13 @@ import org.apache.spark.util.Utils
private[spark]
class PartitionerAwareUnionRDDPartition(
@transient val rdds: Seq[RDD[_]],
- val idx: Int
+ override val index: Int
) extends Partition {
- var parents = rdds.map(_.partitions(idx)).toArray
+ var parents = rdds.map(_.partitions(index)).toArray
- override val index = idx
- override def hashCode(): Int = idx
+ override def hashCode(): Int = index
+
+ override def equals(other: Any): Boolean = super.equals(other)
@throws(classOf[IOException])
private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 800b42505d..29d5d74650 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -25,7 +25,10 @@ import org.apache.spark.serializer.Serializer
private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
override val index: Int = idx
- override def hashCode(): Int = idx
+
+ override def hashCode(): Int = index
+
+ override def equals(other: Any): Boolean = super.equals(other)
}
/**
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
index d8d818ceed..8386869237 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
@@ -18,6 +18,7 @@
package org.apache.spark.scheduler
import java.util.Arrays
+import java.util.Objects
import org.apache.spark._
import org.apache.spark.rdd.RDD
@@ -53,6 +54,9 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
parentPartitionMapping(parent.getPartition(key))
}
+ override def hashCode(): Int =
+ 31 * Objects.hashCode(parent) + Arrays.hashCode(partitionStartIndices)
+
override def equals(other: Any): Boolean = other match {
case c: CoalescedPartitioner =>
c.parent == parent && Arrays.equals(c.partitionStartIndices, partitionStartIndices)
@@ -66,6 +70,8 @@ private[spark] class CustomShuffledRDDPartition(
extends Partition {
override def hashCode(): Int = index
+
+ override def equals(other: Any): Boolean = super.equals(other)
}
/**
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 27d063630b..57a8231200 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -476,6 +476,9 @@ object KryoTest {
class ClassWithNoArgConstructor {
var x: Int = 0
+
+ override def hashCode(): Int = x
+
override def equals(other: Any): Boolean = other match {
case c: ClassWithNoArgConstructor => x == c.x
case _ => false
@@ -483,6 +486,8 @@ object KryoTest {
}
class ClassWithoutNoArgConstructor(val x: Int) {
+ override def hashCode(): Int = x
+
override def equals(other: Any): Boolean = other match {
case c: ClassWithoutNoArgConstructor => x == c.x
case _ => false
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 932704c1a3..4920b7ee8b 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -124,6 +124,8 @@ class ClosureCleanerSuite extends SparkFunSuite {
// A non-serializable class we create in closures to make sure that we aren't
// keeping references to unneeded variables from our outer closures.
class NonSerializable(val id: Int = -1) {
+ override def hashCode(): Int = id
+
override def equals(other: Any): Boolean = {
other match {
case o: NonSerializable => id == o.id
diff --git a/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala
index c787b5f066..ea22db3555 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/FixedHashObject.scala
@@ -22,4 +22,8 @@ package org.apache.spark.util.collection
*/
case class FixedHashObject(v: Int, h: Int) extends Serializable {
override def hashCode(): Int = h
+ override def equals(other: Any): Boolean = other match {
+ case that: FixedHashObject => v == that.v && h == that.h
+ case _ => false
+ }
}