diff options
author | Marcelo Vanzin <vanzin@cloudera.com> | 2015-05-12 01:39:21 -0700 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2015-05-12 01:39:28 -0700 |
commit | afe54b76a69fcbc18d37db969de6088847329de6 (patch) | |
tree | bb620a58d8ef84610ba8ccb9d7139c7de8db1737 /project | |
parent | 4092a2e859bc4dbb8c62d37f6937ecf235fca9a9 (diff) | |
download | spark-afe54b76a69fcbc18d37db969de6088847329de6.tar.gz spark-afe54b76a69fcbc18d37db969de6088847329de6.tar.bz2 spark-afe54b76a69fcbc18d37db969de6088847329de6.zip |
[SPARK-7485] [BUILD] Remove pyspark files from assembly.
The sbt part of the build is hacky; it basically tricks sbt
into generating the zip by using a generator, but returns
an empty list for the generated files so that nothing is
actually added to the assembly.
Author: Marcelo Vanzin <vanzin@cloudera.com>
Closes #6022 from vanzin/SPARK-7485 and squashes the following commits:
22c1e04 [Marcelo Vanzin] Remove unneeded code.
4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.
(cherry picked from commit 82e890fb19d6fbaffa69856eecb4699f2f8a81eb)
Signed-off-by: Andrew Or <andrew@databricks.com>
Diffstat (limited to 'project')
-rw-r--r-- | project/SparkBuild.scala | 44 |
1 files changed, 3 insertions, 41 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 186345af0e..1b87e4e98b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -168,7 +168,7 @@ object SparkBuild extends PomBuild { /* Enable Assembly for all assembly projects */ assemblyProjects.foreach(enable(Assembly.settings)) - /* Package pyspark artifacts in the main assembly. */ + /* Package pyspark artifacts in a separate zip file for YARN. */ enable(PySparkAssembly.settings)(assembly) /* Enable unidoc only for the root spark project */ @@ -373,22 +373,15 @@ object PySparkAssembly { import java.util.zip.{ZipOutputStream, ZipEntry} lazy val settings = Seq( - unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" }, // Use a resource generator to copy all .py files from python/pyspark into a managed directory // to be included in the assembly. We can't just add "python/" to the assembly's resource dir // list since that will copy unneeded / unwanted files. resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File => val src = new File(BuildCommons.sparkHome, "python/pyspark") - val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") zipFile.delete() zipRecursive(src, zipFile) - - val dst = new File(outDir, "pyspark") - if (!dst.isDirectory()) { - require(dst.mkdirs()) - } - copy(src, dst) + Seq[File]() } ) @@ -416,42 +409,11 @@ object PySparkAssembly { output.write(buf, 0, n) } } + output.closeEntry() in.close() } } - private def copy(src: File, dst: File): Seq[File] = { - src.listFiles().flatMap { f => - val child = new File(dst, f.getName()) - if (f.isDirectory()) { - child.mkdir() - copy(f, child) - } else if (f.getName().endsWith(".py")) { - var in: Option[FileInputStream] = None - var out: Option[FileOutputStream] = None - try { - in = Some(new FileInputStream(f)) - out = Some(new FileOutputStream(child)) - - val bytes = new Array[Byte](1024) - var read = 0 - while (read >= 0) { - read = in.get.read(bytes) - if (read > 0) { - out.get.write(bytes, 0, read) - } - } - - Some(child) - } finally { - in.foreach(_.close()) - out.foreach(_.close()) - } - } else { - None - } - } - } } object Unidoc { |