aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assembly/pom.xml20
-rw-r--r--bagel/pom.xml4
-rw-r--r--bin/compute-classpath.cmd7
-rwxr-xr-xbin/compute-classpath.sh7
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala21
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala5
-rw-r--r--core/src/test/scala/org/apache/spark/DriverSuite.scala2
-rw-r--r--examples/pom.xml361
-rw-r--r--external/flume-sink/pom.xml22
-rw-r--r--external/flume/pom.xml15
-rw-r--r--external/zeromq/pom.xml1
-rw-r--r--graphx/pom.xml4
-rw-r--r--mllib/pom.xml6
-rw-r--r--pom.xml281
-rw-r--r--repl/pom.xml11
-rw-r--r--sql/core/pom.xml2
-rw-r--r--sql/hive-thriftserver/pom.xml9
-rw-r--r--sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala3
-rw-r--r--sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala9
-rw-r--r--sql/hive/pom.xml31
-rw-r--r--streaming/pom.xml8
-rw-r--r--yarn/pom.xml7
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala7
23 files changed, 490 insertions, 353 deletions
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 78fb908f9a..b2a9d0780e 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -354,5 +354,25 @@
</dependency>
</dependencies>
</profile>
+
+ <!-- Profiles that disable inclusion of certain dependencies. -->
+ <profile>
+ <id>hadoop-provided</id>
+ <properties>
+ <hadoop.deps.scope>provided</hadoop.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hive-provided</id>
+ <properties>
+ <hive.deps.scope>provided</hive.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>parquet-provided</id>
+ <properties>
+ <parquet.deps.scope>provided</parquet.deps.scope>
+ </properties>
+ </profile>
</profiles>
</project>
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 3bcd38fa32..510e92640e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -41,10 +41,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- </dependency>
- <dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<scope>test</scope>
diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd
index a4c099fb45..088f993954 100644
--- a/bin/compute-classpath.cmd
+++ b/bin/compute-classpath.cmd
@@ -109,6 +109,13 @@ if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
:no_yarn_conf_dir
+rem To allow for distributions to append needed libraries to the classpath (e.g. when
+rem using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
+rem append it to tbe final classpath.
+if not "x%$SPARK_DIST_CLASSPATH%"=="x" (
+ set CLASSPATH=%CLASSPATH%;%SPARK_DIST_CLASSPATH%
+)
+
rem A bit of a hack to allow calling this script within run2.cmd without seeing output
if "%DONT_PRINT_CLASSPATH%"=="1" goto exit
diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh
index a31ea73d3c..8f3b396ffd 100755
--- a/bin/compute-classpath.sh
+++ b/bin/compute-classpath.sh
@@ -146,4 +146,11 @@ if [ -n "$YARN_CONF_DIR" ]; then
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
fi
+# To allow for distributions to append needed libraries to the classpath (e.g. when
+# using the "hadoop-provided" profile to build Spark), check SPARK_DIST_CLASSPATH and
+# append it to tbe final classpath.
+if [ -n "$SPARK_DIST_CLASSPATH" ]; then
+ CLASSPATH="$CLASSPATH:$SPARK_DIST_CLASSPATH"
+fi
+
echo "$CLASSPATH"
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 8c7de75600..7eb87a564d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -55,19 +55,26 @@ private[spark] class SparkDeploySchedulerBackend(
"{{WORKER_URL}}")
val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions")
.map(Utils.splitCommandString).getOrElse(Seq.empty)
- val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath").toSeq.flatMap { cp =>
- cp.split(java.io.File.pathSeparator)
- }
- val libraryPathEntries =
- sc.conf.getOption("spark.executor.extraLibraryPath").toSeq.flatMap { cp =>
- cp.split(java.io.File.pathSeparator)
+ val classPathEntries = sc.conf.getOption("spark.executor.extraClassPath")
+ .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
+ val libraryPathEntries = sc.conf.getOption("spark.executor.extraLibraryPath")
+ .map(_.split(java.io.File.pathSeparator).toSeq).getOrElse(Nil)
+
+ // When testing, expose the parent class path to the child. This is processed by
+ // compute-classpath.{cmd,sh} and makes all needed jars available to child processes
+ // when the assembly is built with the "*-provided" profiles enabled.
+ val testingClassPath =
+ if (sys.props.contains("spark.testing")) {
+ sys.props("java.class.path").split(java.io.File.pathSeparator).toSeq
+ } else {
+ Nil
}
// Start executors with a few necessary configs for registering with the scheduler
val sparkJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
val javaOpts = sparkJavaOpts ++ extraJavaOpts
val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend",
- args, sc.executorEnvs, classPathEntries, libraryPathEntries, javaOpts)
+ args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts)
val appUIAddress = sc.ui.map(_.appUIAddress).getOrElse("")
val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
appUIAddress, sc.eventLogDir)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 9d6b6161ce..c4f1898a2d 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -990,11 +990,12 @@ private[spark] object Utils extends Logging {
for ((key, value) <- extraEnvironment) {
environment.put(key, value)
}
+
val process = builder.start()
new Thread("read stderr for " + command(0)) {
override def run() {
for (line <- Source.fromInputStream(process.getErrorStream).getLines()) {
- System.err.println(line)
+ logInfo(line)
}
}
}.start()
@@ -1089,7 +1090,7 @@ private[spark] object Utils extends Logging {
var firstUserLine = 0
var insideSpark = true
var callStack = new ArrayBuffer[String]() :+ "<unknown>"
-
+
Thread.currentThread.getStackTrace().foreach { ste: StackTraceElement =>
// When running under some profilers, the current stack trace might contain some bogus
// frames. This is intended to ensure that we don't crash in these situations by
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 541d8eac80..8a54360e81 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -35,7 +35,7 @@ class DriverSuite extends FunSuite with Timeouts {
forAll(masters) { (master: String) =>
failAfter(60 seconds) {
Utils.executeAndGetOutput(
- Seq("./bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
+ Seq(s"$sparkHome/bin/spark-class", "org.apache.spark.DriverWithoutCleanup", master),
new File(sparkHome),
Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome))
}
diff --git a/examples/pom.xml b/examples/pom.xml
index bdc5d0562f..002d4458c4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -98,143 +98,145 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop1-compat</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <!-- hbase uses v2.4, which is better, but ...-->
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-testing-util</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.jruby</groupId>
- <artifactId>jruby-complete</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-protocol</artifactId>
- <version>${hbase.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-common</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-auth</artifactId>
- </exclusion>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop1-compat</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-math</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- </exclusion>
- <exclusion>
- <!-- hbase uses v2.4, which is better, but ...-->
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop-compat</artifactId>
- <version>${hbase.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop-compat</artifactId>
- <version>${hbase.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
@@ -308,31 +310,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <configuration>
- <shadedArtifactAttached>false</shadedArtifactAttached>
- <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
- <artifactSet>
- <includes>
- <include>*:*</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>com.google.guava:guava</artifact>
- <excludes>
- <exclude>com/google/common/base/Optional*</exclude>
- </excludes>
- </filter>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
<executions>
<execution>
<phase>package</phase>
@@ -340,6 +317,34 @@
<goal>shade</goal>
</goals>
<configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>com.google.guava:guava</artifact>
+ <excludes>
+ <!--
+ Exclude all Guava classes so they're picked up from the main assembly. The
+ dependency still needs to be compile-scoped so that the relocation below
+ works.
+ -->
+ <exclude>**</exclude>
+ </excludes>
+ </filter>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
<relocations>
<relocation>
<pattern>com.google</pattern>
@@ -411,7 +416,7 @@
</properties>
</profile>
<profile>
- <!-- We add a source directory specific to Scala 2.10 since Kafka
+ <!-- We add a source directory specific to Scala 2.10 since Kafka
only works with it -->
<id>scala-2.10</id>
<activation>
@@ -449,5 +454,37 @@
</plugins>
</build>
</profile>
+
+ <!-- Profiles that disable inclusion of certain dependencies. -->
+ <profile>
+ <id>flume-provided</id>
+ <properties>
+ <flume.deps.scope>provided</flume.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hadoop-provided</id>
+ <properties>
+ <hadoop.deps.scope>provided</hadoop.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hbase-provided</id>
+ <properties>
+ <hbase.deps.scope>provided</hbase.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hive-provided</id>
+ <properties>
+ <hive.deps.scope>provided</hive.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>parquet-provided</id>
+ <properties>
+ <parquet.deps.scope>provided</parquet.deps.scope>
+ </properties>
+ </profile>
</profiles>
</project>
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 71f595d0a6..0706f1ebf6 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -38,32 +38,10 @@
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-sdk</artifactId>
- <version>${flume.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
<artifactId>flume-ng-core</artifactId>
- <version>${flume.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 0374262212..1f2681394c 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -48,18 +48,11 @@
</dependency>
<dependency>
<groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
<artifactId>flume-ng-sdk</artifactId>
- <version>${flume.version}</version>
- <exclusions>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
index 2fb5f0ed2f..e919c2c9b1 100644
--- a/external/zeromq/pom.xml
+++ b/external/zeromq/pom.xml
@@ -44,7 +44,6 @@
<dependency>
<groupId>${akka.group}</groupId>
<artifactId>akka-zeromq_${scala.binary.version}</artifactId>
- <version>${akka.version}</version>
</dependency>
<dependency>
<groupId>org.scalacheck</groupId>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 91db799d24..72374aae6d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -46,10 +46,6 @@
<version>${jblas.version}</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- </dependency>
- <dependency>
<groupId>org.scalacheck</groupId>
<artifactId>scalacheck_${scala.binary.version}</artifactId>
<scope>test</scope>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 2198757481..a0bda89cca 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -29,7 +29,7 @@
<artifactId>spark-mllib_2.10</artifactId>
<properties>
<sbt.project.name>mllib</sbt.project.name>
- </properties>
+ </properties>
<packaging>jar</packaging>
<name>Spark Project ML Library</name>
<url>http://spark.apache.org/</url>
@@ -51,10 +51,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- </dependency>
- <dependency>
<groupId>org.jblas</groupId>
<artifactId>jblas</artifactId>
<version>${jblas.version}</version>
diff --git a/pom.xml b/pom.xml
index 46ff211f91..703e5c47bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -123,8 +123,10 @@
<protobuf.version>2.4.1</protobuf.version>
<yarn.version>${hadoop.version}</yarn.version>
<hbase.version>0.94.6</hbase.version>
+ <hbase.artifact>hbase</hbase.artifact>
<flume.version>1.4.0</flume.version>
<zookeeper.version>3.4.5</zookeeper.version>
+ <hive.group>org.spark-project.hive</hive.group>
<!-- Version used in Maven Hive dependency -->
<hive.version>0.13.1a</hive.version>
<!-- Version used for internal directory structure -->
@@ -143,13 +145,36 @@
<commons.httpclient.version>4.2.6</commons.httpclient.version>
<commons.math3.version>3.1.1</commons.math3.version>
<test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>
- <PermGen>64m</PermGen>
- <MaxPermGen>512m</MaxPermGen>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<jline.version>${scala.version}</jline.version>
<jline.groupid>org.scala-lang</jline.groupid>
- <jackson.version>1.8.8</jackson.version>
+ <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
+ <snappy.version>1.1.1.6</snappy.version>
+
+ <!--
+ Dependency scopes that can be overridden by enabling certain profiles. These profiles are
+ declared in the projects that build assemblies.
+
+ For other projects the scope should remain as "compile", otherwise they are not available
+ during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and
+ needing Hadoop classes in the classpath to compile).
+ -->
+ <flume.deps.scope>compile</flume.deps.scope>
+ <hadoop.deps.scope>compile</hadoop.deps.scope>
+ <hbase.deps.scope>compile</hbase.deps.scope>
+ <hive.deps.scope>compile</hive.deps.scope>
+ <parquet.deps.scope>compile</parquet.deps.scope>
+
+ <!--
+ Overridable test home. So that you can call individual pom files directory without
+ things breaking.
+ -->
+ <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+
+ <PermGen>64m</PermGen>
+ <MaxPermGen>512m</MaxPermGen>
+ <CodeCacheSize>512m</CodeCacheSize>
</properties>
<repositories>
@@ -244,21 +269,20 @@
</snapshots>
</pluginRepository>
</pluginRepositories>
-
<dependencies>
- <!--
- This is a dummy dependency that is used along with the shading plug-in
- to create effective poms on publishing (see SPARK-3812).
- -->
+ <!--
+ This is a dummy dependency that is used along with the shading plug-in
+ to create effective poms on publishing (see SPARK-3812).
+ -->
<dependency>
<groupId>org.spark-project.spark</groupId>
<artifactId>unused</artifactId>
<version>1.0.0</version>
</dependency>
<!--
- This dependency has been added to provided scope as it is needed for executing build
- specific groovy scripts using gmaven+ and not required for downstream project building
- with spark.
+ This depndency has been added to provided scope as it is needed for executing build
+ specific groovy scripts using gmaven+ and not required for downstream project building
+ with spark.
-->
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -369,11 +393,13 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@@ -390,6 +416,7 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>com.ning</groupId>
@@ -399,7 +426,8 @@
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
- <version>1.1.1.6</version>
+ <version>${snappy.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
@@ -427,6 +455,7 @@
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>${akka.group}</groupId>
@@ -449,6 +478,17 @@
<version>${akka.version}</version>
</dependency>
<dependency>
+ <groupId>${akka.group}</groupId>
+ <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+ <version>${akka.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>${akka.group}</groupId>
+ <artifactId>akka-actor_${scala.binary.version}</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
<groupId>org.apache.mesos</groupId>
<artifactId>mesos</artifactId>
<version>${mesos.version}</version>
@@ -577,6 +617,7 @@
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
@@ -588,6 +629,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -623,11 +665,13 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-ipc</artifactId>
<version>${avro.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
@@ -656,6 +700,7 @@
<artifactId>avro-mapred</artifactId>
<version>${avro.version}</version>
<classifier>${avro.mapred.classifier}</classifier>
+ <scope>${hive.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
@@ -684,6 +729,7 @@
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
<version>${jets3t.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
@@ -695,6 +741,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
@@ -722,6 +769,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -778,6 +826,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -805,6 +854,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -829,15 +879,126 @@
</exclusions>
</dependency>
<dependency>
- <!-- Matches the versions of jackson-mapper-asl and jackson-core-asl with avro -->
- <groupId>org.codehaus.jackson</groupId>
- <artifactId>jackson-mapper-asl</artifactId>
- <version>${jackson.version}</version>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ <version>${zookeeper.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
- <version>${jackson.version}</version>
+ <version>${codehaus.jackson.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ <version>${codehaus.jackson.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-beeline</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.esotericsoftware.kryo</groupId>
+ <artifactId>kryo</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>parquet-column</artifactId>
+ <version>${parquet.version}</version>
+ <scope>${parquet.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>parquet-hadoop</artifactId>
+ <version>${parquet.version}</version>
+ <scope>${parquet.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ <version>${flume.version}</version>
+ <scope>${flume.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-sdk</artifactId>
+ <version>${flume.version}</version>
+ <scope>${flume.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
</dependencyManagement>
@@ -914,6 +1075,7 @@
<jvmArg>-Xmx1024m</jvmArg>
<jvmArg>-XX:PermSize=${PermGen}</jvmArg>
<jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
+ <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
</jvmArgs>
<javacArgs>
<javacArg>-source</javacArg>
@@ -980,15 +1142,21 @@
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>SparkTestSuite.txt</filereports>
- <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+ <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
<stderr/>
+ <environmentVariables>
+ <!--
+ Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+ launched by the tests have access to the correct test-time classpath.
+ -->
+ <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
+ </environmentVariables>
<systemProperties>
<java.awt.headless>true</java.awt.headless>
- <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+ <spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled>
<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.executor.extraClassPath>${test_classpath}</spark.executor.extraClassPath>
<spark.driver.allowMultipleContexts>true</spark.driver.allowMultipleContexts>
</systemProperties>
</configuration>
@@ -1013,11 +1181,6 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>2.2</version>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<configuration>
@@ -1104,6 +1267,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
+ <version>2.2</version>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<artifactSet>
@@ -1373,53 +1537,6 @@
</dependencies>
</profile>
- <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
- <profile>
- <id>hadoop-provided</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-server-web-proxy</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
- <version>${zookeeper.version}</version>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- </profile>
<profile>
<id>hive-thriftserver</id>
<modules>
@@ -1472,5 +1589,25 @@
</properties>
</profile>
+ <!--
+ These empty profiles are available in some sub-modules. Declare them here so that
+ maven does not complain when they're provided on the command line for a sub-module
+ that does not have them.
+ -->
+ <profile>
+ <id>flume-provided</id>
+ </profile>
+ <profile>
+ <id>hadoop-provided</id>
+ </profile>
+ <profile>
+ <id>hbase-provided</id>
+ </profile>
+ <profile>
+ <id>hive-provided</id>
+ </profile>
+ <profile>
+ <id>parquet-provided</id>
+ </profile>
</profiles>
</project>
diff --git a/repl/pom.xml b/repl/pom.xml
index 97165e0249..0bc8bccf90 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -69,10 +69,6 @@
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- </dependency>
- <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>${scala.version}</version>
@@ -103,13 +99,6 @@
<skip>true</skip>
</configuration>
</plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-install-plugin</artifactId>
- <configuration>
- <skip>true</skip>
- </configuration>
- </plugin>
<!-- Include a source dir depending on the Scala version -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 023ce2041b..3e9ef07df9 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -56,12 +56,10 @@
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-column</artifactId>
- <version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop</artifactId>
- <version>${parquet.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index d3a517375c..259eef0b80 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -42,19 +42,16 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-cli</artifactId>
- <version>${hive.version}</version>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-jdbc</artifactId>
- <version>${hive.version}</version>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-beeline</artifactId>
- <version>${hive.version}</version>
</dependency>
</dependencies>
<build>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index e8ffbc5b95..60953576d0 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -48,6 +48,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
| --master local
| --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
| --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
+ | --driver-class-path ${sys.props("java.class.path")}
""".stripMargin.split("\\s+").toSeq ++ extraArgs
}
@@ -70,7 +71,7 @@ class CliSuite extends FunSuite with BeforeAndAfterAll with Logging {
}
// Searching expected output line from both stdout and stderr of the CLI process
- val process = (Process(command) #< queryStream).run(
+ val process = (Process(command, None) #< queryStream).run(
ProcessLogger(captureOutput("stdout"), captureOutput("stderr")))
try {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
index 94d5ed4f1d..7814aa38f4 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala
@@ -142,6 +142,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
| --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=http
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT}=$port
+ | --driver-class-path ${sys.props("java.class.path")}
""".stripMargin.split("\\s+").toSeq
} else {
s"""$startScript
@@ -151,6 +152,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
| --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
| --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_PORT}=$port
+ | --driver-class-path ${sys.props("java.class.path")}
""".stripMargin.split("\\s+").toSeq
}
@@ -179,8 +181,9 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
}
}
- // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths
- val env = Seq("SPARK_TESTING" -> "0")
+ val env = Seq(
+ // Resets SPARK_TESTING to avoid loading Log4J configurations in testing class paths
+ "SPARK_TESTING" -> "0")
Process(command, None, env: _*).run(ProcessLogger(
captureThriftServerOutput("stdout"),
@@ -214,7 +217,7 @@ class HiveThriftServer2Suite extends FunSuite with Logging {
} finally {
warehousePath.delete()
metastorePath.delete()
- Process(stopScript).run().exitValue()
+ Process(stopScript, None, env: _*).run().exitValue()
// The `spark-daemon.sh' script uses kill, which is not synchronous, have to wait for a while.
Thread.sleep(3.seconds.toMillis)
Option(logTailingProcess).map(_.destroy())
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 46aacad011..58b0722464 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -47,9 +47,8 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-metastore</artifactId>
- <version>${hive.version}</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
@@ -57,51 +56,27 @@
<version>3.1</version>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-exec</artifactId>
- <version>${hive.version}</version>
- <exclusions>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.esotericsoftware.kryo</groupId>
- <artifactId>kryo</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</dependency>
<dependency>
- <groupId>org.spark-project.hive</groupId>
+ <groupId>${hive.group}</groupId>
<artifactId>hive-serde</artifactId>
- <version>${hive.version}</version>
- <exclusions>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- </exclusion>
- <exclusion>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging-api</artifactId>
- </exclusion>
- </exclusions>
</dependency>
<!-- hive-serde already depends on avro, but this brings in customized config of avro deps from parent -->
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
- <version>${avro.version}</version>
</dependency>
<!-- use the build matching the hadoop api of avro-mapred (i.e. no classifier for hadoop 1 API,
hadoop2 classifier for hadoop 2 API. avro-mapred is a dependency of org.spark-project.hive:hive-serde -->
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
- <version>${avro.version}</version>
<classifier>${avro.mapred.classifier}</classifier>
</dependency>
<dependency>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 2023210d9b..d3c6d0347a 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -68,13 +68,13 @@
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
- <!--
- This plugin forces the generation of jar containing streaming test classes,
+ <!--
+ This plugin forces the generation of jar containing streaming test classes,
so that the tests classes of external modules can use them. The two execution profiles
are necessary - first one for 'mvn package', second one for 'mvn test-compile'. Ideally,
- 'mvn compile' should not compile test classes and therefore should not need this.
+ 'mvn compile' should not compile test classes and therefore should not need this.
However, an open Maven bug (http://jira.codehaus.org/browse/MNG-3559)
- causes the compilation to fail if streaming test-jar is not generated. Hence, the
+ causes the compilation to fail if streaming test-jar is not generated. Hence, the
second execution profile for 'mvn test-compile'.
-->
<plugin>
diff --git a/yarn/pom.xml b/yarn/pom.xml
index bcb77b3e3c..b86857db7b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -131,13 +131,6 @@
<skip>true</skip>
</configuration>
</plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-install-plugin</artifactId>
- <configuration>
- <skip>true</skip>
- </configuration>
- </plugin>
</plugins>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 8d05437713..c363d755c1 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -367,6 +367,10 @@ private[spark] class Client(
}
}
+ sys.env.get(ENV_DIST_CLASSPATH).foreach { dcp =>
+ env(ENV_DIST_CLASSPATH) = dcp
+ }
+
env
}
@@ -652,6 +656,9 @@ object Client extends Logging {
val APP_FILE_PERMISSION: FsPermission =
FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)
+ // Distribution-defined classpath to add to processes
+ val ENV_DIST_CLASSPATH = "SPARK_DIST_CLASSPATH"
+
/**
* Find the user-defined Spark jar if configured, or return the jar containing this
* class if not.