SPARK-1843: Replace assemble-deps with env variable.

(This change is actually small, I moved some logic into compute-classpath that was previously in spark-class). Assemble deps has existed for a while to allow developers to run local code with new changes quickly. When I'm developing I typically use a simpler approach which just prepends the Spark classes to the classpath before the assembly jar. This is well defined in the JVM and the Spark classes take precedence over those in the assembly. This approach is portable across both builds which is the main reason I'd like to switch to it. It's also a bit easier to toggle on and off quickly. The way you use this is the following: ``` $ ./bin/spark-shell # Use spark with the normal assembly $ export SPARK_PREPEND_CLASSES=true $ ./bin/spark-shell # Now it's using compiled classes $ unset SPARK_PREPEND_CLASSES $ ./bin/spark-shell # Back to normal ``` Author: Patrick Wendell <pwendell@gmail.com> Closes #877 from pwendell/assemble-deps and squashes the following commits: 8a11345 [Patrick Wendell] Merge remote-tracking branch 'apache/master' into assemble-deps faa3168 [Patrick Wendell] Adding a warning for compatibility 3f151a7 [Patrick Wendell] Small fix bbfb73c [Patrick Wendell] Review feedback 328e9f8 [Patrick Wendell] SPARK-1843: Replace assemble-deps with env variable.
author: Patrick Wendell <pwendell@gmail.com> 2014-06-12 15:43:32 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-06-12 15:43:32 -0700
commit: 1c04652c8f18566baafb13dbae355f8ad2ad8d37 (patch)
tree: ea88c5147a602b2dfb8cfaa4de6c436716fac508 /bin/compute-classpath.sh
parent: ecde5b837534b11d365fcab78089820990b815cf (diff)
download: spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.tar.gz
spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.tar.bz2
spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.zip
1 files changed, 25 insertions, 9 deletions
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 7df43a555d..2cf4e381c1 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -38,8 +38,10 @@ else
   JAR_CMD="jar"
 fi
 
-# First check if we have a dependencies jar. If so, include binary classes with the deps jar
-if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
+# A developer option to prepend more recently compiled Spark classes
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+  echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
+    "classes ahead of assembly." >&2
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
@@ -51,17 +53,31 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
   CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
+fi
 
-  ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null)
+# Use spark-assembly jar from either RELEASE or assembly directory
+if [ -f "$FWDIR/RELEASE" ]; then
+  assembly_folder="$FWDIR"/lib
 else
-  # Else use spark-assembly jar from either RELEASE or assembly directory
-  if [ -f "$FWDIR/RELEASE" ]; then
-    ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null)
-  else
-    ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null)
-  fi
+  assembly_folder="$ASSEMBLY_DIR"
 fi
 
+num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
+if [ "$num_jars" -eq "0" ]; then
+  echo "Failed to find Spark assembly in $assembly_folder"
+  echo "You need to build Spark before running this program."
+  exit 1
+fi
+if [ "$num_jars" -gt "1" ]; then
+  jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
+  echo "Found multiple Spark assembly jars in $assembly_folder:"
+  echo "$jars_list"
+  echo "Please remove all but one jar."
+  exit 1
+fi
+
+ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
+
 # Verify that versions of java used to build the jars and run Spark are compatible
 jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
 if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
author	Patrick Wendell <pwendell@gmail.com>	2014-06-12 15:43:32 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-06-12 15:43:32 -0700
commit	1c04652c8f18566baafb13dbae355f8ad2ad8d37 (patch)
tree	ea88c5147a602b2dfb8cfaa4de6c436716fac508 /bin/compute-classpath.sh
parent	ecde5b837534b11d365fcab78089820990b815cf (diff)
download	spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.tar.gz spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.tar.bz2 spark-1c04652c8f18566baafb13dbae355f8ad2ad8d37.zip