aboutsummaryrefslogtreecommitdiff
path: root/project
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2015-05-12 01:39:21 -0700
committerAndrew Or <andrew@databricks.com>2015-05-12 01:39:21 -0700
commit82e890fb19d6fbaffa69856eecb4699f2f8a81eb (patch)
treeaf355cf7aad98e5d27cc82d9790632b44620e0b1 /project
parent984787526625b4ef8a1635faf7a5ac3cb0b758b7 (diff)
downloadspark-82e890fb19d6fbaffa69856eecb4699f2f8a81eb.tar.gz
spark-82e890fb19d6fbaffa69856eecb4699f2f8a81eb.tar.bz2
spark-82e890fb19d6fbaffa69856eecb4699f2f8a81eb.zip
[SPARK-7485] [BUILD] Remove pyspark files from assembly.
The sbt part of the build is hacky; it basically tricks sbt into generating the zip by using a generator, but returns an empty list for the generated files so that nothing is actually added to the assembly. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #6022 from vanzin/SPARK-7485 and squashes the following commits: 22c1e04 [Marcelo Vanzin] Remove unneeded code. 4893622 [Marcelo Vanzin] [SPARK-7485] [build] Remove pyspark files from assembly.
Diffstat (limited to 'project')
-rw-r--r--project/SparkBuild.scala44
1 files changed, 3 insertions, 41 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 186345af0e..1b87e4e98b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -168,7 +168,7 @@ object SparkBuild extends PomBuild {
/* Enable Assembly for all assembly projects */
assemblyProjects.foreach(enable(Assembly.settings))
- /* Package pyspark artifacts in the main assembly. */
+ /* Package pyspark artifacts in a separate zip file for YARN. */
enable(PySparkAssembly.settings)(assembly)
/* Enable unidoc only for the root spark project */
@@ -373,22 +373,15 @@ object PySparkAssembly {
import java.util.zip.{ZipOutputStream, ZipEntry}
lazy val settings = Seq(
- unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
// Use a resource generator to copy all .py files from python/pyspark into a managed directory
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
val src = new File(BuildCommons.sparkHome, "python/pyspark")
-
val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
zipFile.delete()
zipRecursive(src, zipFile)
-
- val dst = new File(outDir, "pyspark")
- if (!dst.isDirectory()) {
- require(dst.mkdirs())
- }
- copy(src, dst)
+ Seq[File]()
}
)
@@ -416,42 +409,11 @@ object PySparkAssembly {
output.write(buf, 0, n)
}
}
+ output.closeEntry()
in.close()
}
}
- private def copy(src: File, dst: File): Seq[File] = {
- src.listFiles().flatMap { f =>
- val child = new File(dst, f.getName())
- if (f.isDirectory()) {
- child.mkdir()
- copy(f, child)
- } else if (f.getName().endsWith(".py")) {
- var in: Option[FileInputStream] = None
- var out: Option[FileOutputStream] = None
- try {
- in = Some(new FileInputStream(f))
- out = Some(new FileOutputStream(child))
-
- val bytes = new Array[Byte](1024)
- var read = 0
- while (read >= 0) {
- read = in.get.read(bytes)
- if (read > 0) {
- out.get.write(bytes, 0, read)
- }
- }
-
- Some(child)
- } finally {
- in.foreach(_.close())
- out.foreach(_.close())
- }
- } else {
- None
- }
- }
- }
}
object Unidoc {