aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-01-08 17:15:13 -0800
committerPatrick Wendell <pwendell@gmail.com>2015-01-08 17:15:13 -0800
commit48cecf673c38ead56afa2dea49d295165c67cdf4 (patch)
tree67d1a63293ecbac130ea608d08a46e5489922b3b /examples
parentc9c8b219ad81c4c30bc1598ff35b01f964570c29 (diff)
downloadspark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.gz
spark-48cecf673c38ead56afa2dea49d295165c67cdf4.tar.bz2
spark-48cecf673c38ead56afa2dea49d295165c67cdf4.zip
[SPARK-4048] Enhance and extend hadoop-provided profile.
This change does a few things to make the hadoop-provided profile more useful: - Create new profiles for other libraries / services that might be provided by the infrastructure - Simplify and fix the poms so that the profiles are only activated while building assemblies. - Fix tests so that they're able to run when the profiles are activated - Add a new env variable to be used by distributions that use these profiles to provide the runtime classpath for Spark jobs and daemons. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #2982 from vanzin/SPARK-4048 and squashes the following commits: 82eb688 [Marcelo Vanzin] Add a comment. eb228c0 [Marcelo Vanzin] Fix borked merge. 4e38f4e [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9ef79a3 [Marcelo Vanzin] Alternative way to propagate test classpath to child processes. 371ebee [Marcelo Vanzin] Review feedback. 52f366d [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 83099fc [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 7377e7b [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 322f882 [Marcelo Vanzin] Fix merge fail. f24e9e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 8b00b6a [Marcelo Vanzin] Merge branch 'master' into SPARK-4048 9640503 [Marcelo Vanzin] Cleanup child process log message. 115fde5 [Marcelo Vanzin] Simplify a comment (and make it consistent with another pom). e3ab2da [Marcelo Vanzin] Fix hive-thriftserver profile. 7820d58 [Marcelo Vanzin] Fix CliSuite with provided profiles. 1be73d4 [Marcelo Vanzin] Restore flume-provided profile. d1399ed [Marcelo Vanzin] Restore jetty dependency. 82a54b9 [Marcelo Vanzin] Remove unused profile. 5c54a25 [Marcelo Vanzin] Fix HiveThriftServer2Suite with *-provided profiles. 1fc4d0b [Marcelo Vanzin] Update dependencies for hive-thriftserver. f7b3bbe [Marcelo Vanzin] Add snappy to hadoop-provided list. 9e4e001 [Marcelo Vanzin] Remove duplicate hive profile. d928d62 [Marcelo Vanzin] Redirect child stderr to parent's log. 4d67469 [Marcelo Vanzin] Propagate SPARK_DIST_CLASSPATH on Yarn. 417d90e [Marcelo Vanzin] Introduce "SPARK_DIST_CLASSPATH". 2f95f0d [Marcelo Vanzin] Propagate classpath to child processes during testing. 1adf91c [Marcelo Vanzin] Re-enable maven-install-plugin for a few projects. 284dda6 [Marcelo Vanzin] Rework the "hadoop-provided" profile, add new ones.
Diffstat (limited to 'examples')
-rw-r--r--examples/pom.xml361
1 files changed, 199 insertions, 162 deletions
diff --git a/examples/pom.xml b/examples/pom.xml
index bdc5d0562f..002d4458c4 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -98,143 +98,145 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-testing-util</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.jruby</groupId>
+ <artifactId>jruby-complete</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-protocol</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-common</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-client</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>io.netty</groupId>
+ <artifactId>netty</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-server</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ <exclusions>
+ <exclusion>
+ <!-- SPARK-4455 -->
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-auth</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-annotations</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop1-compat</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-math</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ </exclusion>
+ <exclusion>
+ <!-- hbase uses v2.4, which is better, but ...-->
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <scope>${hbase.deps.scope}</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase-hadoop-compat</artifactId>
+ <version>${hbase.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-testing-util</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.jruby</groupId>
- <artifactId>jruby-complete</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-protocol</artifactId>
- <version>${hbase.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-common</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>io.netty</groupId>
- <artifactId>netty</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-auth</artifactId>
- </exclusion>
- <exclusion>
- <!-- SPARK-4455 -->
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-annotations</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop1-compat</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-math</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- </exclusion>
- <exclusion>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- </exclusion>
- <exclusion>
- <!-- hbase uses v2.4, which is better, but ...-->
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop-compat</artifactId>
- <version>${hbase.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-hadoop-compat</artifactId>
- <version>${hbase.version}</version>
- <type>test-jar</type>
- <scope>test</scope>
- </dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
@@ -308,31 +310,6 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
- <configuration>
- <shadedArtifactAttached>false</shadedArtifactAttached>
- <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
- <artifactSet>
- <includes>
- <include>*:*</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>com.google.guava:guava</artifact>
- <excludes>
- <exclude>com/google/common/base/Optional*</exclude>
- </excludes>
- </filter>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
<executions>
<execution>
<phase>package</phase>
@@ -340,6 +317,34 @@
<goal>shade</goal>
</goals>
<configuration>
+ <shadedArtifactAttached>false</shadedArtifactAttached>
+ <outputFile>${project.build.directory}/scala-${scala.binary.version}/spark-examples-${project.version}-hadoop${hadoop.version}.jar</outputFile>
+ <artifactSet>
+ <includes>
+ <include>*:*</include>
+ </includes>
+ </artifactSet>
+ <filters>
+ <filter>
+ <artifact>com.google.guava:guava</artifact>
+ <excludes>
+ <!--
+ Exclude all Guava classes so they're picked up from the main assembly. The
+ dependency still needs to be compile-scoped so that the relocation below
+ works.
+ -->
+ <exclude>**</exclude>
+ </excludes>
+ </filter>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
<relocations>
<relocation>
<pattern>com.google</pattern>
@@ -411,7 +416,7 @@
</properties>
</profile>
<profile>
- <!-- We add a source directory specific to Scala 2.10 since Kafka
+ <!-- We add a source directory specific to Scala 2.10 since Kafka
only works with it -->
<id>scala-2.10</id>
<activation>
@@ -449,5 +454,37 @@
</plugins>
</build>
</profile>
+
+ <!-- Profiles that disable inclusion of certain dependencies. -->
+ <profile>
+ <id>flume-provided</id>
+ <properties>
+ <flume.deps.scope>provided</flume.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hadoop-provided</id>
+ <properties>
+ <hadoop.deps.scope>provided</hadoop.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hbase-provided</id>
+ <properties>
+ <hbase.deps.scope>provided</hbase.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>hive-provided</id>
+ <properties>
+ <hive.deps.scope>provided</hive.deps.scope>
+ </properties>
+ </profile>
+ <profile>
+ <id>parquet-provided</id>
+ <properties>
+ <parquet.deps.scope>provided</parquet.deps.scope>
+ </properties>
+ </profile>
</profiles>
</project>