aboutsummaryrefslogtreecommitdiff
path: root/project
diff options
context:
space:
mode:
authorReynold Xin <rxin@apache.org>2014-06-03 18:37:40 -0700
committerXiangrui Meng <meng@databricks.com>2014-06-03 18:37:40 -0700
commit1faef149f763f4a54aaa6e17043d0a628ae338a0 (patch)
tree620cce43de43aca6df401bd910e259f335ae22eb /project
parent21e40ed88bf2c205c3d7f947fde5d5a6f3e29f7f (diff)
downloadspark-1faef149f763f4a54aaa6e17043d0a628ae338a0.tar.gz
spark-1faef149f763f4a54aaa6e17043d0a628ae338a0.tar.bz2
spark-1faef149f763f4a54aaa6e17043d0a628ae338a0.zip
SPARK-1941: Update streamlib to 2.7.0 and use HyperLogLogPlus instead of HyperLogLog.
I also corrected some errors made in the previous HLL count approximate API, including relativeSD wasn't really a measure for error (and we used it to test error bounds in test results). Author: Reynold Xin <rxin@apache.org> Closes #897 from rxin/hll and squashes the following commits: 4d83f41 [Reynold Xin] New error bound and non-randomness. f154ea0 [Reynold Xin] Added a comment on the value bound for testing. e367527 [Reynold Xin] One more round of code review. 41e649a [Reynold Xin] Update final mima list. 9e320c8 [Reynold Xin] Incorporate code review feedback. e110d70 [Reynold Xin] Merge branch 'master' into hll 354deb8 [Reynold Xin] Added comment on the Mima exclude rules. acaa524 [Reynold Xin] Added the right exclude rules in MimaExcludes. 6555bfe [Reynold Xin] Added a default method and re-arranged MimaExcludes. 1db1522 [Reynold Xin] Excluded util.SerializableHyperLogLog from MIMA check. 9221b27 [Reynold Xin] Merge branch 'master' into hll 88cfe77 [Reynold Xin] Updated documentation and restored the old incorrect API to maintain API compatibility. 1294be6 [Reynold Xin] Updated HLL+. e7786cb [Reynold Xin] Merge branch 'master' into hll c0ef0c2 [Reynold Xin] SPARK-1941: Update streamlib to 2.7.0 and use HyperLogLogPlus instead of HyperLogLog.
Diffstat (limited to 'project')
-rw-r--r--project/MimaExcludes.scala22
-rw-r--r--project/SparkBuild.scala2
2 files changed, 21 insertions, 3 deletions
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index fc9cbeaec6..fadf6a4d8b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -16,7 +16,6 @@
*/
import com.typesafe.tools.mima.core._
-import com.typesafe.tools.mima.core.ProblemFilters._
/**
* Additional excludes for checking of Spark's binary compatibility.
@@ -35,8 +34,27 @@ object MimaExcludes {
val excludes =
SparkBuild.SPARK_VERSION match {
case v if v.startsWith("1.1") =>
+ Seq(MimaBuild.excludeSparkPackage("graphx")) ++
Seq(
- MimaBuild.excludeSparkPackage("graphx"))
+ // We made a mistake earlier (ed06500d3) in the Java API to use default parameter values
+ // for countApproxDistinct* functions, which does not work in Java. We later removed
+ // them, and use the following to tell Mima to not care about them.
+ ProblemFilters.exclude[IncompatibleResultTypeProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
+ ProblemFilters.exclude[IncompatibleResultTypeProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaPairRDD.countApproxDistinctByKey$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaRDD.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaRDDLike.countApproxDistinct$default$1"),
+ ProblemFilters.exclude[MissingMethodProblem](
+ "org.apache.spark.api.java.JavaDoubleRDD.countApproxDistinct$default$1")
+ ) ++
+ MimaBuild.excludeSparkClass("util.SerializableHyperLogLog")
case v if v.startsWith("1.0") =>
Seq(
MimaBuild.excludeSparkPackage("api.java"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index c2a20d86b2..efb0b9319b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -362,7 +362,7 @@ object SparkBuild extends Build {
"com.twitter" %% "chill" % chillVersion excludeAll(excludeAsm),
"com.twitter" % "chill-java" % chillVersion excludeAll(excludeAsm),
"org.tachyonproject" % "tachyon" % "0.4.1-thrift" excludeAll(excludeHadoop, excludeCurator, excludeEclipseJetty, excludePowermock),
- "com.clearspring.analytics" % "stream" % "2.5.1" excludeAll(excludeFastutil),
+ "com.clearspring.analytics" % "stream" % "2.7.0" excludeAll(excludeFastutil), // Only HyperLogLogPlus is used, which does not depend on fastutil.
"org.spark-project" % "pyrolite" % "2.0.1",
"net.sf.py4j" % "py4j" % "0.8.1"
),