aboutsummaryrefslogtreecommitdiff
path: root/yarn
diff options
context:
space:
mode:
authorMarcelo Vanzin <vanzin@cloudera.com>2016-05-24 10:26:55 -0700
committerMarcelo Vanzin <vanzin@cloudera.com>2016-05-24 10:26:55 -0700
commita313a5ae74ae4e7686283657ba56076222317595 (patch)
treee56d6366c1f5c63b68de63a3f70c092232f630dd /yarn
parent695d9a0fd461070ee2684b2210fb69d0b6ed1a95 (diff)
downloadspark-a313a5ae74ae4e7686283657ba56076222317595.tar.gz
spark-a313a5ae74ae4e7686283657ba56076222317595.tar.bz2
spark-a313a5ae74ae4e7686283657ba56076222317595.zip
[SPARK-15405][YARN] Remove unnecessary upload of config archive.
We only need one copy of it. The client code that was uploading the second copy just needs to be modified to update the metadata in the cache, so that the AM knows where to find the configuration. Tested by running app on YARN and verifying in the logs only one archive is uploaded. Author: Marcelo Vanzin <vanzin@cloudera.com> Closes #13232 from vanzin/SPARK-15405.
Diffstat (limited to 'yarn')
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala9
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala12
2 files changed, 13 insertions, 8 deletions
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 3f6d7b28b5..a12391d081 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -591,10 +591,11 @@ private[spark] class Client(
copyFileToRemote(destDir, localConfArchive, replication, force = true,
destName = Some(LOCALIZED_CONF_ARCHIVE))
- val (_, confLocalizedPath) = distribute(createConfArchive().toURI().getPath(),
- resType = LocalResourceType.ARCHIVE,
- destName = Some(LOCALIZED_CONF_DIR))
- require(confLocalizedPath != null)
+ // Manually add the config archive to the cache manager so that the AM is launched with
+ // the proper files set up.
+ distCacheMgr.addResource(
+ remoteFs, hadoopConf, remoteConfArchivePath, localResources, LocalResourceType.ARCHIVE,
+ LOCALIZED_CONF_DIR, statCache, appMasterOnly = false)
// Clear the cache-related entries from the configuration to avoid them polluting the
// UI's environment page. This works for client mode; for cluster mode, this is handled
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index a408c48d1d..0a4f291e25 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -144,9 +144,16 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
.set(SPARK_JARS, Seq(SPARK))
.set("spark.yarn.dist.jars", ADDED)
val client = createClient(sparkConf, args = Array("--jar", USER))
+ doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
+ any(classOf[Path]), anyShort(), anyBoolean(), any())
val tempDir = Utils.createTempDir()
try {
+ // Because we mocked "copyFileToRemote" above to avoid having to create fake local files,
+ // we need to create a fake config archive in the temp dir to avoid having
+ // prepareLocalResources throw an exception.
+ new FileOutputStream(new File(tempDir, LOCALIZED_CONF_ARCHIVE)).close()
+
client.prepareLocalResources(new Path(tempDir.getAbsolutePath()), Nil)
sparkConf.get(APP_JAR) should be (Some(USER))
@@ -384,10 +391,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
conf: Configuration = new Configuration(),
args: Array[String] = Array()): Client = {
val clientArgs = new ClientArguments(args)
- val client = spy(new Client(clientArgs, conf, sparkConf))
- doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
- any(classOf[Path]), anyShort(), anyBoolean(), any())
- client
+ spy(new Client(clientArgs, conf, sparkConf))
}
private def classpath(client: Client): Array[String] = {