[SPARK-13294][PROJECT INFRA] Remove MiMa's dependency on spark-class / Spark assembly

This patch removes the need to build a full Spark assembly before running the `dev/mima` script. - I modified the `tools` project to remove a direct dependency on Spark, so `sbt/sbt tools/fullClasspath` will now return the classpath for the `GenerateMIMAIgnore` class itself plus its own dependencies. - This required me to delete two classes full of dead code that we don't use anymore - `GenerateMIMAIgnore` now uses [ClassUtil](http://software.clapper.org/classutil/) to find all of the Spark classes rather than our homemade JAR traversal code. The problem in our own code was that it didn't handle folders of classes properly, which is necessary in order to generate excludes with an assembly-free Spark build. - `./dev/mima` no longer runs through `spark-class`, eliminating the need to reason about classpath ordering between `SPARK_CLASSPATH` and the assembly. Author: Josh Rosen <joshrosen@databricks.com> Closes #11178 from JoshRosen/remove-assembly-in-run-tests.
author: Josh Rosen <joshrosen@databricks.com> 2016-03-10 23:28:34 -0800
committer: Josh Rosen <joshrosen@databricks.com> 2016-03-10 23:28:34 -0800
commit: 6ca990fb366cf68cd9d5afb433725d28f07e51a0 (patch)
tree: 036d4b8afc9f8f14f95e5f31e92cabcd51850242 /dev
parent: d18276cb1d82790a402960835e112aebd0c55513 (diff)
download: spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.tar.gz
spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.tar.bz2
spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.zip
2 files changed, 28 insertions, 19 deletions
diff --git a/dev/mima b/dev/mima
index d5baffc6ef..b7f8d62b7d 100755
--- a/dev/mima
+++ b/dev/mima
@@ -24,24 +24,21 @@ set -e
 FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
-echo -e "q\n" | build/sbt oldDeps/update
+TOOLS_CLASSPATH="$(build/sbt "export tools/fullClasspath" | tail -n1)"
+
 rm -f .generated-mima*
 
 generate_mima_ignore() {
-  SPARK_JAVA_OPTS="-XX:MaxPermSize=1g -Xmx2g" \
-    ./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
+    java \
+      -XX:MaxPermSize=1g \
+      -Xmx2g \
+      -cp "$TOOLS_CLASSPATH:$1" \
+      org.apache.spark.tools.GenerateMIMAIgnore
 }
 
-# Generate Mima Ignore is called twice, first with latest built jars
-# on the classpath and then again with previous version jars on the classpath.
-# Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
-# it did not process the new classes (which are in assembly jar).
-generate_mima_ignore
-
-export SPARK_CLASSPATH="$(build/sbt "export oldDeps/fullClasspath" | tail -n1)"
-echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
-
-generate_mima_ignore
+SPARK_PROFILES="-Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"
+generate_mima_ignore "$(build/sbt $SPARK_PROFILES "export assembly/fullClasspath" | tail -n1)"
+generate_mima_ignore "$(build/sbt $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
 
 echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
 ret_val=$?
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 6e45113134..ebeede52c9 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -336,7 +336,6 @@ def build_spark_sbt(hadoop_version):
     # Enable all of the profiles for the build:
     build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
     sbt_goals = ["package",
-                 "assembly/assembly",
                  "streaming-kafka-assembly/assembly",
                  "streaming-flume-assembly/assembly",
                  "streaming-mqtt-assembly/assembly",
@@ -350,6 +349,16 @@ def build_spark_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
+def build_spark_assembly_sbt(hadoop_version):
+    # Enable all of the profiles for the build:
+    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    sbt_goals = ["assembly/assembly"]
+    profiles_and_goals = build_profiles + sbt_goals
+    print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
+          " ".join(profiles_and_goals))
+    exec_sbt(profiles_and_goals)
+
+
 def build_apache_spark(build_tool, hadoop_version):
     """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
     `maven`). Defaults to using `sbt`."""
@@ -561,11 +570,14 @@ def main():
     # spark build
     build_apache_spark(build_tool, hadoop_version)
 
-    # TODO Temporarily disable MiMA check for DF-to-DS migration prototyping
-    # # backwards compatibility checks
-    # if build_tool == "sbt":
-    #     # Note: compatiblity tests only supported in sbt for now
-    #     detect_binary_inop_with_mima()
+    # backwards compatibility checks
+    if build_tool == "sbt":
+        # Note: compatibility tests only supported in sbt for now
+        # TODO Temporarily disable MiMA check for DF-to-DS migration prototyping
+        # detect_binary_inop_with_mima()
+        # Since we did not build assembly/assembly before running dev/mima, we need to
+        # do it here because the tests still rely on it; see SPARK-13294 for details.
+        build_spark_assembly_sbt(hadoop_version)
 
     # run the test suites
     run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
author	Josh Rosen <joshrosen@databricks.com>	2016-03-10 23:28:34 -0800
committer	Josh Rosen <joshrosen@databricks.com>	2016-03-10 23:28:34 -0800
commit	6ca990fb366cf68cd9d5afb433725d28f07e51a0 (patch)
tree	036d4b8afc9f8f14f95e5f31e92cabcd51850242 /dev
parent	d18276cb1d82790a402960835e112aebd0c55513 (diff)
download	spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.tar.gz spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.tar.bz2 spark-6ca990fb366cf68cd9d5afb433725d28f07e51a0.zip