aboutsummaryrefslogtreecommitdiff
path: root/yarn/src/test/scala/org
diff options
context:
space:
mode:
authorKishor Patil <kpatil@yahoo-inc.com>2016-11-03 16:10:26 -0500
committerTom Graves <tgraves@yahoo-inc.com>2016-11-03 16:10:26 -0500
commit098e4ca9c7af61e64839a50c65be449749af6482 (patch)
tree49d79c9bf9f5c27b34760687a62fd99838b78ecf /yarn/src/test/scala/org
parent16293311cdb25a62733a9aae4355659b971a3ce1 (diff)
downloadspark-098e4ca9c7af61e64839a50c65be449749af6482.tar.gz
spark-098e4ca9c7af61e64839a50c65be449749af6482.tar.bz2
spark-098e4ca9c7af61e64839a50c65be449749af6482.zip
[SPARK-18099][YARN] Fail if same files added to distributed cache for --files and --archives
## What changes were proposed in this pull request? During spark-submit, if yarn dist cache is instructed to add same file under --files and --archives, This code change ensures the spark yarn distributed cache behaviour is retained i.e. to warn and fail if same files is mentioned in both --files and --archives. ## How was this patch tested? Manually tested: 1. if same jar is mentioned in --jars and --files it will continue to submit the job. - basically functionality [SPARK-14423] #12203 is unchanged 1. if same file is mentioned in --files and --archives it will fail to submit the job. Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request. … under archives and files Author: Kishor Patil <kpatil@yahoo-inc.com> Closes #15627 from kishorvpatil/spark18099.
Diffstat (limited to 'yarn/src/test/scala/org')
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala42
1 files changed, 42 insertions, 0 deletions
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 0a4f291e25..06516c1baf 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -282,6 +282,48 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
}
}
+ test("distribute archive multiple times") {
+ val libs = Utils.createTempDir()
+ // Create jars dir and RELEASE file to avoid IllegalStateException.
+ val jarsDir = new File(libs, "jars")
+ assert(jarsDir.mkdir())
+ new FileOutputStream(new File(libs, "RELEASE")).close()
+
+ val userLib1 = Utils.createTempDir()
+ val testJar = TestUtils.createJarWithFiles(Map(), userLib1)
+
+ // Case 1: FILES_TO_DISTRIBUTE and ARCHIVES_TO_DISTRIBUTE can't have duplicate files
+ val sparkConf = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+ .set(FILES_TO_DISTRIBUTE, Seq(testJar.getPath))
+ .set(ARCHIVES_TO_DISTRIBUTE, Seq(testJar.getPath))
+
+ val client = createClient(sparkConf)
+ val tempDir = Utils.createTempDir()
+ intercept[IllegalArgumentException] {
+ client.prepareLocalResources(new Path(tempDir.getAbsolutePath()), Nil)
+ }
+
+ // Case 2: FILES_TO_DISTRIBUTE can't have duplicate files.
+ val sparkConfFiles = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+ .set(FILES_TO_DISTRIBUTE, Seq(testJar.getPath, testJar.getPath))
+
+ val clientFiles = createClient(sparkConfFiles)
+ val tempDirForFiles = Utils.createTempDir()
+ intercept[IllegalArgumentException] {
+ clientFiles.prepareLocalResources(new Path(tempDirForFiles.getAbsolutePath()), Nil)
+ }
+
+ // Case 3: ARCHIVES_TO_DISTRIBUTE can't have duplicate files.
+ val sparkConfArchives = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+ .set(ARCHIVES_TO_DISTRIBUTE, Seq(testJar.getPath, testJar.getPath))
+
+ val clientArchives = createClient(sparkConfArchives)
+ val tempDirForArchives = Utils.createTempDir()
+ intercept[IllegalArgumentException] {
+ clientArchives.prepareLocalResources(new Path(tempDirForArchives.getAbsolutePath()), Nil)
+ }
+ }
+
test("distribute local spark jars") {
val temp = Utils.createTempDir()
val jarsDir = new File(temp, "jars")