diff options
author | Marcelo Vanzin <vanzin@cloudera.com> | 2015-01-08 17:15:13 -0800 |
---|---|---|
committer | Patrick Wendell <pwendell@gmail.com> | 2015-01-08 17:15:13 -0800 |
commit | 48cecf673c38ead56afa2dea49d295165c67cdf4 (patch) | |
tree | 67d1a63293ecbac130ea608d08a46e5489922b3b /pom.xml | |
parent | c9c8b219ad81c4c30bc1598ff35b01f964570c29 (diff) | |
download | spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.gz spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.bz2 spark-48cecf673c38ead56afa2dea49d295165c67cdf4.zip |
[SPARK-4048] Enhance and extend hadoop-provided profile.
This change does a few things to make the hadoop-provided profile more useful:
- Create new profiles for other libraries / services that might be provided by the infrastructure
- Simplify and fix the poms so that the profiles are only activated while building assemblies.
- Fix tests so that they're able to run when the profiles are activated
- Add a new env variable to be used by distributions that use these profiles to provide the runtime
classpath for Spark jobs and daemons.
Author: Marcelo Vanzin <vanzin@cloudera.com>
Closes #2982 from vanzin/SPARK-4048 and squashes the following commits:
82eb688 [Marcelo Vanzin] Add a comment.
eb228c0 [Marcelo Vanzin] Fix borked merge.
4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes.
371ebee [Marcelo Vanzin] Review feedback.
52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
322f882 [Marcelo Vanzin] Fix merge fail.
f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048
9640503 [Marcelo Vanzin] Cleanup child process log message.
115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom).
e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile.
7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles.
1be73d4 [Marcelo Vanzin] Restore flume-provided profile.
d1399ed [Marcelo Vanzin] Restore jetty dependency.
82a54b9 [Marcelo Vanzin] Remove unused profile.
5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles.
1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver.
f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list.
9e4e001 [Marcelo Vanzin] Remove duplicate hive profile.
d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log.
4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn.
417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH".
2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing.
1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects.
284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
Diffstat (limited to 'pom.xml')
-rw-r--r-- | pom.xml | 281 |
1 files changed, 209 insertions, 72 deletions
@@ -123,8 +123,10 @@ <protobuf.version>2.4.1</protobuf.version> <yarn.version>${hadoop.version}</yarn.version> <hbase.version>0.94.6</hbase.version> + <hbase.artifact>hbase</hbase.artifact> <flume.version>1.4.0</flume.version> <zookeeper.version>3.4.5</zookeeper.version> + <hive.group>org.spark-project.hive</hive.group> <!-- Version used in Maven Hive dependency --> <hive.version>0.13.1a</hive.version> <!-- Version used for internal directory structure --> @@ -143,13 +145,36 @@ <commons.httpclient.version>4.2.6</commons.httpclient.version> <commons.math3.version>3.1.1</commons.math3.version> <test_classpath_file>${project.build.directory}/spark-test-classpath.txt</test_classpath_file> - <PermGen>64m</PermGen> - <MaxPermGen>512m</MaxPermGen> <scala.version>2.10.4</scala.version> <scala.binary.version>2.10</scala.binary.version> <jline.version>${scala.version}</jline.version> <jline.groupid>org.scala-lang</jline.groupid> - <jackson.version>1.8.8</jackson.version> + <codehaus.jackson.version>1.8.8</codehaus.jackson.version> + <snappy.version>1.1.1.6</snappy.version> + + <!-- + Dependency scopes that can be overridden by enabling certain profiles. These profiles are + declared in the projects that build assemblies. + + For other projects the scope should remain as "compile", otherwise they are not available + during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and + needing Hadoop classes in the classpath to compile). + --> + <flume.deps.scope>compile</flume.deps.scope> + <hadoop.deps.scope>compile</hadoop.deps.scope> + <hbase.deps.scope>compile</hbase.deps.scope> + <hive.deps.scope>compile</hive.deps.scope> + <parquet.deps.scope>compile</parquet.deps.scope> + + <!-- + Overridable test home. So that you can call individual pom files directory without + things breaking. + --> + <spark.test.home>${session.executionRootDirectory}</spark.test.home> + + <PermGen>64m</PermGen> + <MaxPermGen>512m</MaxPermGen> + <CodeCacheSize>512m</CodeCacheSize> </properties> <repositories> @@ -244,21 +269,20 @@ </snapshots> </pluginRepository> </pluginRepositories> - <dependencies> - <!-- - This is a dummy dependency that is used along with the shading plug-in - to create effective poms on publishing (see SPARK-3812). - --> + <!-- + This is a dummy dependency that is used along with the shading plug-in + to create effective poms on publishing (see SPARK-3812). + --> <dependency> <groupId>org.spark-project.spark</groupId> <artifactId>unused</artifactId> <version>1.0.0</version> </dependency> <!-- - This dependency has been added to provided scope as it is needed for executing build - specific groovy scripts using gmaven+ and not required for downstream project building - with spark. + This depndency has been added to provided scope as it is needed for executing build + specific groovy scripts using gmaven+ and not required for downstream project building + with spark. --> <dependency> <groupId>org.codehaus.groovy</groupId> @@ -369,11 +393,13 @@ <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>${slf4j.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>${slf4j.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>org.slf4j</groupId> @@ -390,6 +416,7 @@ <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>${log4j.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>com.ning</groupId> @@ -399,7 +426,8 @@ <dependency> <groupId>org.xerial.snappy</groupId> <artifactId>snappy-java</artifactId> - <version>1.1.1.6</version> + <version>${snappy.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>net.jpountz.lz4</groupId> @@ -427,6 +455,7 @@ <groupId>com.google.protobuf</groupId> <artifactId>protobuf-java</artifactId> <version>${protobuf.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>${akka.group}</groupId> @@ -449,6 +478,17 @@ <version>${akka.version}</version> </dependency> <dependency> + <groupId>${akka.group}</groupId> + <artifactId>akka-zeromq_${scala.binary.version}</artifactId> + <version>${akka.version}</version> + <exclusions> + <exclusion> + <groupId>${akka.group}</groupId> + <artifactId>akka-actor_${scala.binary.version}</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> <groupId>org.apache.mesos</groupId> <artifactId>mesos</artifactId> <version>${mesos.version}</version> @@ -577,6 +617,7 @@ <groupId>org.apache.curator</groupId> <artifactId>curator-recipes</artifactId> <version>2.4.0</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>org.jboss.netty</groupId> @@ -588,6 +629,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>asm</groupId> @@ -623,11 +665,13 @@ <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> <version>${avro.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro-ipc</artifactId> <version>${avro.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>io.netty</groupId> @@ -656,6 +700,7 @@ <artifactId>avro-mapred</artifactId> <version>${avro.version}</version> <classifier>${avro.mapred.classifier}</classifier> + <scope>${hive.deps.scope}</scope> <exclusions> <exclusion> <groupId>io.netty</groupId> @@ -684,6 +729,7 @@ <groupId>net.java.dev.jets3t</groupId> <artifactId>jets3t</artifactId> <version>${jets3t.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>commons-logging</groupId> @@ -695,6 +741,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> <version>${yarn.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>javax.servlet</groupId> @@ -722,6 +769,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${yarn.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>asm</groupId> @@ -778,6 +826,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-server-web-proxy</artifactId> <version>${yarn.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>asm</groupId> @@ -805,6 +854,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-client</artifactId> <version>${yarn.version}</version> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>asm</groupId> @@ -829,15 +879,126 @@ </exclusions> </dependency> <dependency> - <!-- Matches the versions of jackson-mapper-asl and jackson-core-asl with avro --> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - <version>${jackson.version}</version> + <groupId>org.apache.zookeeper</groupId> + <artifactId>zookeeper</artifactId> + <version>${zookeeper.version}</version> + <scope>${hadoop.deps.scope}</scope> </dependency> <dependency> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-core-asl</artifactId> - <version>${jackson.version}</version> + <version>${codehaus.jackson.version}</version> + <scope>${hadoop.deps.scope}</scope> + </dependency> + <dependency> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + <version>${codehaus.jackson.version}</version> + <scope>${hadoop.deps.scope}</scope> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-beeline</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-cli</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-exec</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.esotericsoftware.kryo</groupId> + <artifactId>kryo</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-jdbc</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-metastore</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-serde</artifactId> + <version>${hive.version}</version> + <scope>${hive.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>com.twitter</groupId> + <artifactId>parquet-column</artifactId> + <version>${parquet.version}</version> + <scope>${parquet.deps.scope}</scope> + </dependency> + <dependency> + <groupId>com.twitter</groupId> + <artifactId>parquet-hadoop</artifactId> + <version>${parquet.version}</version> + <scope>${parquet.deps.scope}</scope> + </dependency> + <dependency> + <groupId>org.apache.flume</groupId> + <artifactId>flume-ng-core</artifactId> + <version>${flume.version}</version> + <scope>${flume.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.thrift</groupId> + <artifactId>libthrift</artifactId> + </exclusion> + <exclusion> + <groupId>org.mortbay.jetty</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.flume</groupId> + <artifactId>flume-ng-sdk</artifactId> + <version>${flume.version}</version> + <scope>${flume.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.thrift</groupId> + <artifactId>libthrift</artifactId> + </exclusion> + </exclusions> </dependency> </dependencies> </dependencyManagement> @@ -914,6 +1075,7 @@ <jvmArg>-Xmx1024m</jvmArg> <jvmArg>-XX:PermSize=${PermGen}</jvmArg> <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg> + <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg> </jvmArgs> <javacArgs> <javacArg>-source</javacArg> @@ -980,15 +1142,21 @@ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory> <junitxml>.</junitxml> <filereports>SparkTestSuite.txt</filereports> - <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine> + <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine> <stderr/> + <environmentVariables> + <!-- + Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes + launched by the tests have access to the correct test-time classpath. + --> + <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH> + </environmentVariables> <systemProperties> <java.awt.headless>true</java.awt.headless> - <spark.test.home>${session.executionRootDirectory}</spark.test.home> + <spark.test.home>${spark.test.home}</spark.test.home> <spark.testing>1</spark.testing> <spark.ui.enabled>false</spark.ui.enabled> <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress> - <spark.executor.extraClassPath>${test_classpath}</spark.executor.extraClassPath> <spark.driver.allowMultipleContexts>true</spark.driver.allowMultipleContexts> </systemProperties> </configuration> @@ -1013,11 +1181,6 @@ </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>2.2</version> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-source-plugin</artifactId> <version>2.2.1</version> <configuration> @@ -1104,6 +1267,7 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> + <version>2.2</version> <configuration> <shadedArtifactAttached>false</shadedArtifactAttached> <artifactSet> @@ -1373,53 +1537,6 @@ </dependencies> </profile> - <!-- Build without Hadoop dependencies that are included in some runtime environments. --> - <profile> - <id>hadoop-provided</id> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-server-web-proxy</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <scope>provided</scope> - </dependency> - <dependency> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - <version>${zookeeper.version}</version> - <scope>provided</scope> - </dependency> - </dependencies> - </profile> <profile> <id>hive-thriftserver</id> <modules> @@ -1472,5 +1589,25 @@ </properties> </profile> + <!-- + These empty profiles are available in some sub-modules. Declare them here so that + maven does not complain when they're provided on the command line for a sub-module + that does not have them. + --> + <profile> + <id>flume-provided</id> + </profile> + <profile> + <id>hadoop-provided</id> + </profile> + <profile> + <id>hbase-provided</id> + </profile> + <profile> + <id>hive-provided</id> + </profile> + <profile> + <id>parquet-provided</id> + </profile> </profiles> </project> |