From b7b5e17876f65c6644505c356f1a0db24ce1d142 Mon Sep 17 00:00:00 2001 From: Marcelo Vanzin Date: Thu, 14 Jul 2016 09:42:32 -0500 Subject: [SPARK-16505][YARN] Optionally propagate error during shuffle service startup. This prevents the NM from starting when something is wrong, which would lead to later errors which are confusing and harder to debug. Added a unit test to verify startup fails if something is wrong. Author: Marcelo Vanzin Closes #14162 from vanzin/SPARK-16505. --- .../network/yarn/YarnShuffleServiceSuite.scala | 34 ++++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'yarn/src') diff --git a/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala index 5458fb9d2e..e123e78541 100644 --- a/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala @@ -16,13 +16,17 @@ */ package org.apache.spark.network.yarn -import java.io.{DataOutputStream, File, FileOutputStream} +import java.io.{DataOutputStream, File, FileOutputStream, IOException} +import java.nio.file.Files +import java.nio.file.attribute.PosixFilePermission._ +import java.util.EnumSet import scala.annotation.tailrec import scala.concurrent.duration._ import scala.language.postfixOps import org.apache.hadoop.fs.Path +import org.apache.hadoop.service.ServiceStateException import org.apache.hadoop.yarn.api.records.ApplicationId import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.server.api.{ApplicationInitializationContext, ApplicationTerminationContext} @@ -45,7 +49,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd classOf[YarnShuffleService].getCanonicalName) yarnConfig.setInt("spark.shuffle.service.port", 0) val localDir = Utils.createTempDir() - yarnConfig.set("yarn.nodemanager.local-dirs", localDir.getAbsolutePath) + yarnConfig.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath) } var s1: YarnShuffleService = null @@ -316,4 +320,28 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd s2.stop() } - } + + test("service throws error if cannot start") { + // Create a different config with a read-only local dir. + val roConfig = new YarnConfiguration(yarnConfig) + val roDir = Utils.createTempDir() + Files.setPosixFilePermissions(roDir.toPath(), EnumSet.of(OWNER_READ, OWNER_EXECUTE)) + roConfig.set(YarnConfiguration.NM_LOCAL_DIRS, roDir.getAbsolutePath()) + roConfig.setBoolean(YarnShuffleService.STOP_ON_FAILURE_KEY, true) + + // Try to start the shuffle service, it should fail. + val service = new YarnShuffleService() + + try { + val error = intercept[ServiceStateException] { + service.init(roConfig) + } + assert(error.getCause().isInstanceOf[IOException]) + } finally { + service.stop() + Files.setPosixFilePermissions(roDir.toPath(), + EnumSet.of(OWNER_READ, OWNER_WRITE, OWNER_EXECUTE)) + } + } + +} -- cgit v1.2.3