aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/pom.xml47
-rw-r--r--mllib/pom.xml11
-rw-r--r--project/SparkBuild.scala44
-rw-r--r--sql/core/pom.xml8
-rw-r--r--streaming/pom.xml8
5 files changed, 3 insertions, 115 deletions
diff --git a/core/pom.xml b/core/pom.xml
index fc42f48973..262a3320db 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -381,35 +381,6 @@
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
- <!-- Unzip py4j so we can include its files in the jar -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <execution>
- <phase>generate-resources</phase>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <target>
- <unzip src="../python/lib/py4j-0.8.2.1-src.zip" dest="../python/build" />
- </target>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <configuration>
- <filesets>
- <fileset>
- <directory>${basedir}/../python/build</directory>
- </fileset>
- </filesets>
- <verbose>true</verbose>
- </configuration>
- </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
@@ -438,24 +409,6 @@
</executions>
</plugin>
</plugins>
-
- <resources>
- <resource>
- <directory>src/main/resources</directory>
- </resource>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/*.py</include>
- </includes>
- </resource>
- <resource>
- <directory>../python/build</directory>
- <includes>
- <include>py4j/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
<profiles>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index a3c57ae260..0c07ca1a62 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -141,16 +141,5 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
- <resources>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/mllib/*.py</include>
- <include>pyspark/mllib/stat/*.py</include>
- <include>pyspark/ml/*.py</include>
- <include>pyspark/ml/param/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345af0e..1b87e4e98b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
- /* Package pyspark artifacts in the main assembly. */
+ /* Package pyspark artifacts in a separate zip file for YARN. */
enable(PySparkAssembly.settings)(assembly)
/* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
import java.util.zip.{ZipOutputStream, ZipEntry}
lazy val settings = Seq(
- unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
// Use a resource generator to copy all .py files from python/pyspark into a managed directory
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
zipFile.delete()
zipRecursive(src, zipFile)
-
- val dst = new File(outDir, "pyspark")
- if (!dst.isDirectory()) {
- require(dst.mkdirs())
- }
- copy(src, dst)
+ Seq[File]()
}
)
@@ -416,42 +409,11 @@ object PySparkAssembly {
output.write(buf, 0, n)
}
}
+ output.closeEntry()
in.close()
}
}
- private def copy(src: File, dst: File): Seq[File] = {
- src.listFiles().flatMap { f =>
- val child = new File(dst, f.getName())
- if (f.isDirectory()) {
- child.mkdir()
- copy(f, child)
- } else if (f.getName().endsWith(".py")) {
- var in: Option[FileInputStream] = None
- var out: Option[FileOutputStream] = None
- try {
- in = Some(new FileInputStream(f))
- out = Some(new FileOutputStream(child))
-
- val bytes = new Array[Byte](1024)
- var read = 0
- while (read >= 0) {
- read = in.get.read(bytes)
- if (read > 0) {
- out.get.write(bytes, 0, read)
- }
- }
-
- Some(child)
- } finally {
- in.foreach(_.close())
- out.foreach(_.close())
- }
- } else {
- None
- }
- }
- }
}
object Unidoc {
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7d274a73e0..ffe95bb491 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,13 +103,5 @@
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
- <resources>
- <resource>
- <directory>../../python</directory>
- <includes>
- <include>pyspark/sql/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 5ca55a4f68..5ab7f4472c 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -105,13 +105,5 @@
</configuration>
</plugin>
</plugins>
- <resources>
- <resource>
- <directory>../python</directory>
- <includes>
- <include>pyspark/streaming/*.py</include>
- </includes>
- </resource>
- </resources>
</build>
</project>