aboutsummaryrefslogtreecommitdiff
path: root/pom.xml
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-01-08 17:15:13 -0800
committerPatrick Wendell <pwendell@gmail.com>2015-01-08 17:15:13 -0800
commit48cecf673c38ead56afa2dea49d295165c67cdf4 (patch)
tree67d1a63293ecbac130ea608d08a46e5489922b3b /pom.xml
parentc9c8b219ad81c4c30bc1598ff35b01f964570c29 (diff)
downloadspark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.gz
spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.bz2
spark-48cecf673c38ead56afa2dea49d295165c67cdf4.zip
[SPARK-4048] Enhance and extend hadoop-provided profile.
This change does a few things to make the hadoop-provided profile more useful: - Create new profiles for other libraries / services that might be provided by the infrastructure - Simplify and fix the poms so that the profiles are only activated while building assemblies. - Fix tests so that they're able to run when the profiles are activated - Add a new env variable to be used by distributions that use these profiles to provide the runtime classpath for Spark jobs and daemons. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #2982 from vanzin/SPARK-4048 and squashes the following commits: 82eb688 [Marcelo Vanzin] Add a comment. eb228c0 [Marcelo Vanzin] Fix borked merge. 4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes. 371ebee [Marcelo Vanzin] Review feedback. 52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 322f882 [Marcelo Vanzin] Fix merge fail. f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9640503 [Marcelo Vanzin] Cleanup child process log message. 115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom). e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile. 7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles. 1be73d4 [Marcelo Vanzin] Restore flume-provided profile. d1399ed [Marcelo Vanzin] Restore jetty dependency. 82a54b9 [Marcelo Vanzin] Remove unused profile. 5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles. 1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver. f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list. 9e4e001 [Marcelo Vanzin] Remove duplicate hive profile. d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log. 4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn. 417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH". 2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing. 1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects. 284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
Diffstat (limited to 'pom.xml')
-rw-r--r--pom.xml281
1 files changed, 209 insertions, 72 deletions
diff --git a/pom.xml b/pom.xml
index 46ff211f91..703e5c47bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -123,8 +123,10 @@
<protobuf.version>2.4.1</protobuf.version>
<yarn.version>${hadoop.version}</yarn.version>
<hbase.version>0.94.6</hbase.version>
+ <hbase.artifact>hbase</hbase.artifact>
<flume.version>1.4.0</flume.version>
<zookeeper.version>3.4.5</zookeeper.version>
+ <hive.group>org.spark-project.hive</hive.group>
<!-- Version used in Maven Hive dependency -->
<hive.version>0.13.1a</hive.version>
<!-- Version used for internal directory structure -->
@@ -143,13 +145,36 @@
<commons.httpclient.version>4.2.6</commons.httpclient.version>
<commons.math3.version>3.1.1</commons.math3.version>
<test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file>
- <PermGen>64m</PermGen>
- <MaxPermGen>512m</MaxPermGen>
<scala.version>2.10.4</scala.version>
<scala.binary.version>2.10</scala.binary.version>
<jline.version>${scala.version}</jline.version>
<jline.groupid>org.scala-lang</jline.groupid>
- <jackson.version>1.8.8</jackson.version>
+ <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
+ <snappy.version>1.1.1.6</snappy.version>
+
+ <!--
+ Dependency scopes that can be overridden by enabling certain profiles. These profiles are
+ declared in the projects that build assemblies.
+
+ For other projects the scope should remain as "compile", otherwise they are not available
+ during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and
+ needing Hadoop classes in the classpath to compile).
+ -->
+ <flume.deps.scope>compile</flume.deps.scope>
+ <hadoop.deps.scope>compile</hadoop.deps.scope>
+ <hbase.deps.scope>compile</hbase.deps.scope>
+ <hive.deps.scope>compile</hive.deps.scope>
+ <parquet.deps.scope>compile</parquet.deps.scope>
+
+ <!--
+ Overridable test home. So that you can call individual pom files directory without
+ things breaking.
+ -->
+ <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+
+ <PermGen>64m</PermGen>
+ <MaxPermGen>512m</MaxPermGen>
+ <CodeCacheSize>512m</CodeCacheSize>
</properties>
<repositories>
@@ -244,21 +269,20 @@
</snapshots>
</pluginRepository>
</pluginRepositories>
-
<dependencies>
- <!--
- This is a dummy dependency that is used along with the shading plug-in
- to create effective poms on publishing (see SPARK-3812).
- -->
+ <!--
+ This is a dummy dependency that is used along with the shading plug-in
+ to create effective poms on publishing (see SPARK-3812).
+ -->
<dependency>
<groupId>org.spark-project.spark</groupId>
<artifactId>unused</artifactId>
<version>1.0.0</version>
</dependency>
<!--
- This dependency has been added to provided scope as it is needed for executing build
- specific groovy scripts using gmaven+ and not required for downstream project building
- with spark.
+ This depndency has been added to provided scope as it is needed for executing build
+ specific groovy scripts using gmaven+ and not required for downstream project building
+ with spark.
-->
<dependency>
<groupId>org.codehaus.groovy</groupId>
@@ -369,11 +393,13 @@
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
@@ -390,6 +416,7 @@
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>${log4j.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>com.ning</groupId>
@@ -399,7 +426,8 @@
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
- <version>1.1.1.6</version>
+ <version>${snappy.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>net.jpountz.lz4</groupId>
@@ -427,6 +455,7 @@
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>${akka.group}</groupId>
@@ -449,6 +478,17 @@
<version>${akka.version}</version>
</dependency>
<dependency>
+ <groupId>${akka.group}</groupId>
+ <artifactId>akka-zeromq_${scala.binary.version}</artifactId>
+ <version>${akka.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>${akka.group}</groupId>
+ <artifactId>akka-actor_${scala.binary.version}</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
<groupId>org.apache.mesos</groupId>
<artifactId>mesos</artifactId>
<version>${mesos.version}</version>
@@ -577,6 +617,7 @@
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.4.0</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>org.jboss.netty</groupId>
@@ -588,6 +629,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -623,11 +665,13 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-ipc</artifactId>
<version>${avro.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
@@ -656,6 +700,7 @@
<artifactId>avro-mapred</artifactId>
<version>${avro.version}</version>
<classifier>${avro.mapred.classifier}</classifier>
+ <scope>${hive.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
@@ -684,6 +729,7 @@
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
<version>${jets3t.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
@@ -695,6 +741,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
@@ -722,6 +769,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -778,6 +826,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -805,6 +854,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${yarn.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
<exclusions>
<exclusion>
<groupId>asm</groupId>
@@ -829,15 +879,126 @@
</exclusions>
</dependency>
<dependency>
- <!-- Matches the versions of jackson-mapper-asl and jackson-core-asl with avro -->
- <groupId>org.codehaus.jackson</groupId>
- <artifactId>jackson-mapper-asl</artifactId>
- <version>${jackson.version}</version>
+ <groupId>org.apache.zookeeper</groupId>
+ <artifactId>zookeeper</artifactId>
+ <version>${zookeeper.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
- <version>${jackson.version}</version>
+ <version>${codehaus.jackson.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ <version>${codehaus.jackson.version}</version>
+ <scope>${hadoop.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-beeline</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.esotericsoftware.kryo</groupId>
+ <artifactId>kryo</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>${hive.group}</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>${hive.version}</version>
+ <scope>${hive.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>parquet-column</artifactId>
+ <version>${parquet.version}</version>
+ <scope>${parquet.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.twitter</groupId>
+ <artifactId>parquet-hadoop</artifactId>
+ <version>${parquet.version}</version>
+ <scope>${parquet.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-core</artifactId>
+ <version>${flume.version}</version>
+ <scope>${flume.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.flume</groupId>
+ <artifactId>flume-ng-sdk</artifactId>
+ <version>${flume.version}</version>
+ <scope>${flume.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
</dependencies>
</dependencyManagement>
@@ -914,6 +1075,7 @@
<jvmArg>-Xmx1024m</jvmArg>
<jvmArg>-XX:PermSize=${PermGen}</jvmArg>
<jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
+ <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
</jvmArgs>
<javacArgs>
<javacArg>-source</javacArg>
@@ -980,15 +1142,21 @@
<reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
<junitxml>.</junitxml>
<filereports>SparkTestSuite.txt</filereports>
- <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+ <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
<stderr/>
+ <environmentVariables>
+ <!--
+ Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+ launched by the tests have access to the correct test-time classpath.
+ -->
+ <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
+ </environmentVariables>
<systemProperties>
<java.awt.headless>true</java.awt.headless>
- <spark.test.home>${session.executionRootDirectory}</spark.test.home>
+ <spark.test.home>${spark.test.home}</spark.test.home>
<spark.testing>1</spark.testing>
<spark.ui.enabled>false</spark.ui.enabled>
<spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
- <spark.executor.extraClassPath>${test_classpath}</spark.executor.extraClassPath>
<spark.driver.allowMultipleContexts>true</spark.driver.allowMultipleContexts>
</systemProperties>
</configuration>
@@ -1013,11 +1181,6 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <version>2.2</version>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<configuration>
@@ -1104,6 +1267,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
+ <version>2.2</version>
<configuration>
<shadedArtifactAttached>false</shadedArtifactAttached>
<artifactSet>
@@ -1373,53 +1537,6 @@
</dependencies>
</profile>
- <!-- Build without Hadoop dependencies that are included in some runtime environments. -->
- <profile>
- <id>hadoop-provided</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-api</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-common</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-server-web-proxy</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.avro</groupId>
- <artifactId>avro-ipc</artifactId>
- <scope>provided</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
- <version>${zookeeper.version}</version>
- <scope>provided</scope>
- </dependency>
- </dependencies>
- </profile>
<profile>
<id>hive-thriftserver</id>
<modules>
@@ -1472,5 +1589,25 @@
</properties>
</profile>
+ <!--
+ These empty profiles are available in some sub-modules. Declare them here so that
+ maven does not complain when they're provided on the command line for a sub-module
+ that does not have them.
+ -->
+ <profile>
+ <id>flume-provided</id>
+ </profile>
+ <profile>
+ <id>hadoop-provided</id>
+ </profile>
+ <profile>
+ <id>hbase-provided</id>
+ </profile>
+ <profile>
+ <id>hive-provided</id>
+ </profile>
+ <profile>
+ <id>parquet-provided</id>
+ </profile>
</profiles>
</project>