From a313a5ae74ae4e7686283657ba56076222317595 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Tue, 24 May 2016 10:26:55 -0700 Subject: [SPARK-15405][YARN] Remove unnecessary upload of config archive. We only need one copy of it. The client code that was uploading the second copy just needs to be modified to update the metadata in the cache, so that the AM knows where to find the configuration. Tested by running app on YARN and verifying in the logs only one archive is uploaded. Author: Marcelo Vanzin Closes #13232 from vanzin/SPARK-15405. --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 9 +++++---- .../scala/org/apache/spark/deploy/yarn/ClientSuite.scala | 12 ++++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'yarn/src') diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 3f6d7b28b5..a12391d081 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -591,10 +591,11 @@ private[spark] class Client( copyFileToRemote(destDir, localConfArchive, replication, force = true, destName = Some(LOCALIZED_CONF_ARCHIVE)) - val (_, confLocalizedPath) = distribute(createConfArchive().toURI().getPath(), - resType = LocalResourceType.ARCHIVE, - destName = Some(LOCALIZED_CONF_DIR)) - require(confLocalizedPath != null) + // Manually add the config archive to the cache manager so that the AM is launched with + // the proper files set up. + distCacheMgr.addResource( + remoteFs, hadoopConf, remoteConfArchivePath, localResources, LocalResourceType.ARCHIVE, + LOCALIZED_CONF_DIR, statCache, appMasterOnly = false) // Clear the cache-related entries from the configuration to avoid them polluting the // UI's environment page. This works for client mode; for cluster mode, this is handled diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index a408c48d1d..0a4f291e25 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -144,9 +144,16 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll .set(SPARK_JARS, Seq(SPARK)) .set("spark.yarn.dist.jars", ADDED) val client = createClient(sparkConf, args = Array("--jar", USER)) + doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), + any(classOf[Path]), anyShort(), anyBoolean(), any()) val tempDir = Utils.createTempDir() try { + // Because we mocked "copyFileToRemote" above to avoid having to create fake local files, + // we need to create a fake config archive in the temp dir to avoid having + // prepareLocalResources throw an exception. + new FileOutputStream(new File(tempDir, LOCALIZED_CONF_ARCHIVE)).close() + client.prepareLocalResources(new Path(tempDir.getAbsolutePath()), Nil) sparkConf.get(APP_JAR) should be (Some(USER)) @@ -384,10 +391,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll conf: Configuration = new Configuration(), args: Array[String] = Array()): Client = { val clientArgs = new ClientArguments(args) - val client = spy(new Client(clientArgs, conf, sparkConf)) - doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), - any(classOf[Path]), anyShort(), anyBoolean(), any()) - client + spy(new Client(clientArgs, conf, sparkConf)) } private def classpath(client: Client): Array[String] = { -- cgit v1.2.3