aboutsummaryrefslogtreecommitdiff
path: root/assembly/pom.xml
diff options
context:
space:
mode:
Diffstat (limited to 'assembly/pom.xml')
-rw-r--r--assembly/pom.xml101
1 files changed, 31 insertions, 70 deletions
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 477d4931c3..22cbac06ca 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -33,9 +33,8 @@
<properties>
<sbt.project.name>assembly</sbt.project.name>
- <spark.jar.dir>scala-${scala.binary.version}</spark.jar.dir>
- <spark.jar.basename>spark-assembly-${project.version}-hadoop${hadoop.version}.jar</spark.jar.basename>
- <spark.jar>${project.build.directory}/${spark.jar.dir}/${spark.jar.basename}</spark.jar>
+ <build.testJarPhase>none</build.testJarPhase>
+ <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
</properties>
<dependencies>
@@ -69,6 +68,17 @@
<artifactId>spark-repl_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
+
+ <!--
+ Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
+ that the libraries Spark depend on have it available. We'll package the version that Spark
+ uses (14.0.1) which is not the same as Hadoop dependencies, but works.
+ -->
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <scope>${hadoop.deps.scope}</scope>
+ </dependency>
</dependencies>
<build>
@@ -87,75 +97,26 @@
<skip>true</skip>
</configuration>
</plugin>
- <!-- zip pyspark archives to run python application on yarn mode -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <target>
- <delete dir="${basedir}/../python/lib/pyspark.zip"/>
- <zip destfile="${basedir}/../python/lib/pyspark.zip">
- <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
- </zip>
- </target>
- </configuration>
- </plugin>
- <!-- Use the shade plugin to create a big JAR with all the dependencies -->
+ <!-- zip pyspark archives to run python application on yarn mode -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-shade-plugin</artifactId>
- <configuration>
- <shadedArtifactAttached>false</shadedArtifactAttached>
- <outputFile>${spark.jar}</outputFile>
- <artifactSet>
- <includes>
- <include>*:*</include>
- </includes>
- </artifactSet>
- <filters>
- <filter>
- <artifact>*:*</artifact>
- <excludes>
- <exclude>org/datanucleus/**</exclude>
- <exclude>META-INF/*.SF</exclude>
- <exclude>META-INF/*.DSA</exclude>
- <exclude>META-INF/*.RSA</exclude>
- </excludes>
- </filter>
- </filters>
- </configuration>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>shade</goal>
- </goals>
- <configuration>
- <transformers>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
- <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
- <resource>reference.conf</resource>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
- <resource>log4j.properties</resource>
- </transformer>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
- <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
- </transformers>
- </configuration>
- </execution>
- </executions>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <target>
+ <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+ <zip destfile="${basedir}/../python/lib/pyspark.zip">
+ <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+ </zip>
+ </target>
+ </configuration>
</plugin>
</plugins>
</build>