aboutsummaryrefslogtreecommitdiff
path: root/project
diff options
context:
space:
mode:
Diffstat (limited to 'project')
-rw-r--r--project/SparkBuild.scala37
1 files changed, 35 insertions, 2 deletions
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 026855f8f6..186345af0e 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -370,6 +370,7 @@ object Assembly {
object PySparkAssembly {
import sbtassembly.Plugin._
import AssemblyKeys._
+ import java.util.zip.{ZipOutputStream, ZipEntry}
lazy val settings = Seq(
unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" },
@@ -377,16 +378,48 @@ object PySparkAssembly {
// to be included in the assembly. We can't just add "python/" to the assembly's resource dir
// list since that will copy unneeded / unwanted files.
resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File =>
+ val src = new File(BuildCommons.sparkHome, "python/pyspark")
+
+ val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip")
+ zipFile.delete()
+ zipRecursive(src, zipFile)
+
val dst = new File(outDir, "pyspark")
if (!dst.isDirectory()) {
require(dst.mkdirs())
}
-
- val src = new File(BuildCommons.sparkHome, "python/pyspark")
copy(src, dst)
}
)
+ private def zipRecursive(source: File, destZipFile: File) = {
+ val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile))
+ addFilesToZipStream("", source, destOutput)
+ destOutput.flush()
+ destOutput.close()
+ }
+
+ private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = {
+ if (source.isDirectory()) {
+ output.putNextEntry(new ZipEntry(parent + source.getName()))
+ for (file <- source.listFiles()) {
+ addFilesToZipStream(parent + source.getName() + File.separator, file, output)
+ }
+ } else {
+ val in = new FileInputStream(source)
+ output.putNextEntry(new ZipEntry(parent + source.getName()))
+ val buf = new Array[Byte](8192)
+ var n = 0
+ while (n != -1) {
+ n = in.read(buf)
+ if (n != -1) {
+ output.write(buf, 0, n)
+ }
+ }
+ in.close()
+ }
+ }
+
private def copy(src: File, dst: File): Seq[File] = {
src.listFiles().flatMap { f =>
val child = new File(dst, f.getName())