aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/compute-classpath.sh34
-rwxr-xr-xbin/spark-class17
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala3
-rw-r--r--project/SparkBuild.scala16
4 files changed, 40 insertions, 30 deletions
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index 7df43a555d..2cf4e381c1 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -38,8 +38,10 @@ else
JAR_CMD="jar"
fi
-# First check if we have a dependencies jar. If so, include binary classes with the deps jar
-if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
+# A developer option to prepend more recently compiled Spark classes
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+ echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
+ "classes ahead of assembly." >&2
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
@@ -51,17 +53,31 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
+fi
- ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null)
+# Use spark-assembly jar from either RELEASE or assembly directory
+if [ -f "$FWDIR/RELEASE" ]; then
+ assembly_folder="$FWDIR"/lib
else
- # Else use spark-assembly jar from either RELEASE or assembly directory
- if [ -f "$FWDIR/RELEASE" ]; then
- ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null)
- else
- ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null)
- fi
+ assembly_folder="$ASSEMBLY_DIR"
fi
+num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
+if [ "$num_jars" -eq "0" ]; then
+ echo "Failed to find Spark assembly in $assembly_folder"
+ echo "You need to build Spark before running this program."
+ exit 1
+fi
+if [ "$num_jars" -gt "1" ]; then
+ jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
+ echo "Found multiple Spark assembly jars in $assembly_folder:"
+ echo "$jars_list"
+ echo "Please remove all but one jar."
+ exit 1
+fi
+
+ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
+
# Verify that versions of java used to build the jars and run Spark are compatible
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
diff --git a/bin/spark-class b/bin/spark-class
index e884511010..cfe363a71d 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -108,23 +108,6 @@ fi
export JAVA_OPTS
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
-if [ ! -f "$FWDIR/RELEASE" ]; then
- # Exit if the user hasn't compiled Spark
- num_jars=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar" | wc -l)
- jars_list=$(ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/ | grep "spark-assembly.*hadoop.*.jar")
- if [ "$num_jars" -eq "0" ]; then
- echo "Failed to find Spark assembly in $FWDIR/assembly/target/scala-$SCALA_VERSION/" >&2
- echo "You need to build Spark before running this program." >&2
- exit 1
- fi
- if [ "$num_jars" -gt "1" ]; then
- echo "Found multiple Spark assembly jars in $FWDIR/assembly/target/scala-$SCALA_VERSION:" >&2
- echo "$jars_list"
- echo "Please remove all but one jar."
- exit 1
- fi
-fi
-
TOOLS_DIR="$FWDIR"/tools
SPARK_TOOLS_JAR=""
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/*assembly*[0-9Tg].jar ]; then
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index df15186195..8fbda2c667 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -290,6 +290,9 @@ class SparkContext(config: SparkConf) extends Logging {
value <- Option(System.getenv(envKey)).orElse(Option(System.getProperty(propKey)))} {
executorEnvs(envKey) = value
}
+ Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v =>
+ executorEnvs("SPARK_PREPEND_CLASSES") = v
+ }
// The Mesos scheduler backend relies on this environment variable to set executor memory.
// TODO: Set this only in the Mesos scheduler.
executorEnvs("SPARK_EXECUTOR_MEMORY") = executorMemory + "m"
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index ecd9d70680..8b4885d3bb 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -90,7 +90,16 @@ object SparkBuild extends Build {
lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
.dependsOn(core, graphx, bagel, mllib, streaming, repl, sql) dependsOn(maybeYarn: _*) dependsOn(maybeHive: _*) dependsOn(maybeGanglia: _*)
- lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")
+ lazy val assembleDepsTask = TaskKey[Unit]("assemble-deps")
+ lazy val assembleDeps = assembleDepsTask := {
+ println()
+ println("**** NOTE ****")
+ println("'sbt/sbt assemble-deps' is no longer supported.")
+ println("Instead create a normal assembly and:")
+ println(" export SPARK_PREPEND_CLASSES=1 (toggle on)")
+ println(" unset SPARK_PREPEND_CLASSES (toggle off)")
+ println()
+ }
// A configuration to set an alternative publishLocalConfiguration
lazy val MavenCompile = config("m2r") extend(Compile)
@@ -373,6 +382,7 @@ object SparkBuild extends Build {
"net.sf.py4j" % "py4j" % "0.8.1"
),
libraryDependencies ++= maybeAvro,
+ assembleDeps,
previousArtifact := sparkPreviousArtifact("spark-core")
)
@@ -584,9 +594,7 @@ object SparkBuild extends Build {
def assemblyProjSettings = sharedSettings ++ Seq(
name := "spark-assembly",
- assembleDeps in Compile <<= (packageProjects.map(packageBin in Compile in _) ++ Seq(packageDependency in Compile)).dependOn,
- jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" },
- jarName in packageDependency <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + "-deps.jar" }
+ jarName in assembly <<= version map { v => "spark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
) ++ assemblySettings ++ extraAssemblySettings
def extraAssemblySettings() = Seq(