From 0c1985b153a2dc2c891ae61c1ee67506926384ae Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Tue, 10 Sep 2013 21:38:22 -0700 Subject: Fix HDFS access bug with assembly build. Due to this change in HDFS: https://issues.apache.org/jira/browse/HADOOP-7549 there is a bug when using the new assembly builds. The symptom is that any HDFS access results in an exception saying "No filesystem for scheme 'hdfs'". This adds a merge strategy in the assembly build which fixes the problem. --- assembly/pom.xml | 6 ++++++ project/SparkBuild.scala | 1 + 2 files changed, 7 insertions(+) diff --git a/assembly/pom.xml b/assembly/pom.xml index d19f44d292..808a829e19 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -103,6 +103,12 @@ shade + + + + META-INF/services/org.apache.hadoop.fs.FileSystem + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index a60b553b5a..0736ec3ab7 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -303,6 +303,7 @@ object SparkBuild extends Build { mergeStrategy in assembly := { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard + case "META-INF/services/org.apache.hadoop.fs.FileSystem" => MergeStrategy.concat case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first } -- cgit v1.2.3