aboutsummaryrefslogtreecommitdiff
path: root/bin/compute-classpath.sh
diff options
context:
space:
mode:
Diffstat (limited to 'bin/compute-classpath.sh')
-rwxr-xr-xbin/compute-classpath.sh35
1 files changed, 19 insertions, 16 deletions
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index bef42df71c..be37102dc0 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -30,21 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
# Build up classpath
CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
-# Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
-# existing Spark applications, it is not included in the standard spark assembly. Instead, we only
-# include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
-# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
-# the future.
-if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
-
- # Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
- DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")
- CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
-
- ASSEMBLY_DIR="$FWDIR/sql/hive/target/scala-$SCALA_VERSION/"
-else
- ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION/"
-fi
+ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION"
# First check if we have a dependencies jar. If so, include binary classes with the deps jar
if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
@@ -59,7 +45,7 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
- DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*-deps.jar`
+ DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar`
CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
else
# Else use spark-assembly jar from either RELEASE or assembly directory
@@ -71,6 +57,23 @@ else
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
fi
+# When Hive support is needed, Datanucleus jars must be included on the classpath.
+# Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+# Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+# built with Hive, so first check if the datanucleus jars exist, and then ensure the current Spark
+# assembly is built for Hive, before actually populating the CLASSPATH with the jars.
+# Note that this check order is faster (by up to half a second) in the case where Hive is not used.
+num_datanucleus_jars=$(ls "$FWDIR"/lib_managed/jars/ | grep "datanucleus-.*\\.jar" | wc -l)
+if [ $num_datanucleus_jars -gt 0 ]; then
+ AN_ASSEMBLY_JAR=${ASSEMBLY_JAR:-$DEPS_ASSEMBLY_JAR}
+ num_hive_files=$(jar tvf "$AN_ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null | wc -l)
+ if [ $num_hive_files -gt 0 ]; then
+ echo "Spark assembly has been built with Hive, including Datanucleus jars on classpath" 1>&2
+ DATANUCLEUSJARS=$(echo "$FWDIR/lib_managed/jars"/datanucleus-*.jar | tr " " :)
+ CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
+ fi
+fi
+
# Add test classes if we're running from SBT or Maven with SPARK_TESTING set to 1
if [[ $SPARK_TESTING == 1 ]]; then
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/test-classes"