aboutsummaryrefslogtreecommitdiff
path: root/project/MimaExcludes.scala
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2014-07-17 10:54:53 -0700
committerReynold Xin <rxin@apache.org>2014-07-17 10:54:53 -0700
commitd988d345d5bec0668324386f3e81787f78e75e67 (patch)
tree47ad23ea5c1805950d32608322b9b1f0a485bc5e /project/MimaExcludes.scala
parent9c73822a08848a0cde545282d3eb1c3f1a4c2a82 (diff)
downloadspark-d988d345d5bec0668324386f3e81787f78e75e67.tar.gz
spark-d988d345d5bec0668324386f3e81787f78e75e67.tar.bz2
spark-d988d345d5bec0668324386f3e81787f78e75e67.zip
[SPARK-2534] Avoid pulling in the entire RDD in various operators
This should go into both master and branch-1.0. Author: Reynold Xin <rxin@apache.org> Closes #1450 from rxin/agg-closure and squashes the following commits: e40f363 [Reynold Xin] Mima check excludes. 9186364 [Reynold Xin] Define the return type more explicitly. 38e348b [Reynold Xin] Fixed the cases in RDD.scala. ea6b34d [Reynold Xin] Blah 89b9c43 [Reynold Xin] Fix other instances of accidentally pulling in extra stuff in closures. 73b2783 [Reynold Xin] [SPARK-2534] Avoid pulling in the entire RDD in groupByKey.
Diffstat (limited to 'project/MimaExcludes.scala')
-rw-r--r--project/MimaExcludes.scala159
1 files changed, 87 insertions, 72 deletions
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index d67c6571a0..3487f7c5c1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -31,76 +31,91 @@ import com.typesafe.tools.mima.core._
* MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap")
*/
object MimaExcludes {
- def excludes(version: String) =
- version match {
- case v if v.startsWith("1.1") =>
- Seq(
- MimaBuild.excludeSparkPackage("deploy"),
- MimaBuild.excludeSparkPackage("graphx")
- ) ++
- Seq(
- // Adding new method to JavaRDLike trait - we should probably mark this as a developer API.
- ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitions"),
- // We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
- // for countApproxDistinct* functions, which does not work in Java. We later removed
- // them, and use the following to tell Mima to not care about them.
- ProblemFilters.exclude[IncompatibleResultTypeProblem](
- "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
- ProblemFilters.exclude[IncompatibleResultTypeProblem](
- "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.api.java.JavaPairRDD.countApproxDistinct$default$1"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey$default$1"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.api.java.JavaRDD.countApproxDistinct$default$1"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.storage.MemoryStore.Entry"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
- + "createZero$1")
- ) ++
- Seq(
- ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.FlumeReceiver.this")
- ) ++
- Seq( // Ignore some private methods in ALS.
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
- ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
- "org.apache.spark.mllib.recommendation.ALS.this"),
- ProblemFilters.exclude[MissingMethodProblem](
- "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7")
- ) ++
- MimaBuild.excludeSparkClass("mllib.linalg.distributed.ColumnStatisticsAggregator") ++
- MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
- MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
- MimaBuild.excludeSparkClass("util.SerializableHyperLogLog") ++
- MimaBuild.excludeSparkClass("storage.Values") ++
- MimaBuild.excludeSparkClass("storage.Entry") ++
- MimaBuild.excludeSparkClass("storage.MemoryStore$Entry")
- case v if v.startsWith("1.0") =>
- Seq(
- MimaBuild.excludeSparkPackage("api.java"),
- MimaBuild.excludeSparkPackage("mllib"),
- MimaBuild.excludeSparkPackage("streaming")
- ) ++
- MimaBuild.excludeSparkClass("rdd.ClassTags") ++
- MimaBuild.excludeSparkClass("util.XORShiftRandom") ++
- MimaBuild.excludeSparkClass("graphx.EdgeRDD") ++
- MimaBuild.excludeSparkClass("graphx.VertexRDD") ++
- MimaBuild.excludeSparkClass("graphx.impl.GraphImpl") ++
- MimaBuild.excludeSparkClass("graphx.impl.RoutingTable") ++
- MimaBuild.excludeSparkClass("graphx.util.collection.PrimitiveKeyOpenHashMap") ++
- MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap") ++
- MimaBuild.excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
- MimaBuild.excludeSparkClass("mllib.optimization.SquaredGradient") ++
- MimaBuild.excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
- MimaBuild.excludeSparkClass("mllib.regression.LassoWithSGD") ++
- MimaBuild.excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
- case _ => Seq()
- }
+
+ def excludes(version: String) = version match {
+ case v if v.startsWith("1.1") =>
+ Seq(
+ MimaBuild.excludeSparkPackage("deploy"),
+ MimaBuild.excludeSparkPackage("graphx")
+ ) ++
+ closures.map(method => ProblemFilters.exclude[MissingMethodProblem](method)) ++
+ Seq(
+ // Adding new method to JavaRDLike trait - we should probably mark this as a developer API.
+ ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitions"),
+ // We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
+ // for countApproxDistinct* functions, which does not work in Java. We later removed
+ // them, and use the following to tell Mima to not care about them.
+ ProblemFilters.exclude[IncompatibleResultTypeProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaRDD.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.storage.MemoryStore.Entry"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$"
+ + "createZero$1")
+ ) ++
+ Seq(
+ ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.streaming.flume.FlumeReceiver.this")
+ ) ++
+ Seq( // Ignore some private methods in ALS.
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$^dateFeatures"),
+ ProblemFilters.exclude[MissingMethodProblem]( // The only public constructor is the one without arguments.
+ "org.apache.spark.mllib.recommendation.ALS.this"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.mllib.recommendation.ALS.org$apache$spark$mllib$recommendation$ALS$$<init>$default$7")
+ ) ++
+ MimaBuild.excludeSparkClass("mllib.linalg.distributed.ColumnStatisticsAggregator") ++
+ MimaBuild.excludeSparkClass("rdd.ZippedRDD") ++
+ MimaBuild.excludeSparkClass("rdd.ZippedPartition") ++
+ MimaBuild.excludeSparkClass("util.SerializableHyperLogLog") ++
+ MimaBuild.excludeSparkClass("storage.Values") ++
+ MimaBuild.excludeSparkClass("storage.Entry") ++
+ MimaBuild.excludeSparkClass("storage.MemoryStore$Entry")
+ case v if v.startsWith("1.0") =>
+ Seq(
+ MimaBuild.excludeSparkPackage("api.java"),
+ MimaBuild.excludeSparkPackage("mllib"),
+ MimaBuild.excludeSparkPackage("streaming")
+ ) ++
+ MimaBuild.excludeSparkClass("rdd.ClassTags") ++
+ MimaBuild.excludeSparkClass("util.XORShiftRandom") ++
+ MimaBuild.excludeSparkClass("graphx.EdgeRDD") ++
+ MimaBuild.excludeSparkClass("graphx.VertexRDD") ++
+ MimaBuild.excludeSparkClass("graphx.impl.GraphImpl") ++
+ MimaBuild.excludeSparkClass("graphx.impl.RoutingTable") ++
+ MimaBuild.excludeSparkClass("graphx.util.collection.PrimitiveKeyOpenHashMap") ++
+ MimaBuild.excludeSparkClass("graphx.util.collection.GraphXPrimitiveKeyOpenHashMap") ++
+ MimaBuild.excludeSparkClass("mllib.recommendation.MFDataGenerator") ++
+ MimaBuild.excludeSparkClass("mllib.optimization.SquaredGradient") ++
+ MimaBuild.excludeSparkClass("mllib.regression.RidgeRegressionWithSGD") ++
+ MimaBuild.excludeSparkClass("mllib.regression.LassoWithSGD") ++
+ MimaBuild.excludeSparkClass("mllib.regression.LinearRegressionWithSGD")
+ case _ => Seq()
+ }
+
+ private val closures = Seq(
+ "org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$mergeMaps$1",
+ "org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$countPartition$1",
+ "org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$distributePartition$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$mergeValue$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$writeToFile$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$reducePartition$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$writeShard$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$mergeCombiners$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$process$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$createCombiner$1",
+ "org.apache.spark.rdd.PairRDDFunctions.org$apache$spark$rdd$PairRDDFunctions$$mergeMaps$1"
+ )
}