aboutsummaryrefslogtreecommitdiff
path: root/yarn
diff options
context:
space:
mode:
authorSandy Ryza <sandy@cloudera.com>2015-01-30 11:31:54 -0600
committerThomas Graves <tgraves@apache.org>2015-01-30 11:31:54 -0600
commit254eaa4d350dafe19f1715e80eb816856a126c21 (patch)
tree812b82129636867995d8bbf197495fb9581a5967 /yarn
parent6f21dce5f4619e1a5d07028e2a74dc36be0849b9 (diff)
downloadspark-254eaa4d350dafe19f1715e80eb816856a126c21.tar.gz
spark-254eaa4d350dafe19f1715e80eb816856a126c21.tar.bz2
spark-254eaa4d350dafe19f1715e80eb816856a126c21.zip
SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714
Previously I had tried to solve this with by adding a line in Spark's log4j-defaults.properties. The issue with the message in log4j-defaults.properties was that the log4j.properties packaged inside Hadoop was getting picked up instead. While it would be ideal to fix that as well, we still want to quiet this in situations where a user supplies their own custom log4j properties. Author: Sandy Ryza <sandy@cloudera.com> Closes #4192 from sryza/sandy-spark-5393 and squashes the following commits: 4d5dedc [Sandy Ryza] Only set log level if unset 46e07c5 [Sandy Ryza] SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714
Diffstat (limited to 'yarn')
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala7
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala4
-rw-r--r--yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala18
-rw-r--r--yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala (renamed from yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala)12
4 files changed, 16 insertions, 25 deletions
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index d00f29665a..3849586c61 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -32,6 +32,8 @@ import org.apache.hadoop.yarn.client.api.AMRMClient
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
import org.apache.hadoop.yarn.util.RackResolver
+import org.apache.log4j.{Level, Logger}
+
import org.apache.spark.{Logging, SecurityManager, SparkConf}
import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -60,6 +62,11 @@ private[yarn] class YarnAllocator(
import YarnAllocator._
+ // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
+ if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
+ Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
+ }
+
// Visible for testing.
val allocatedHostToContainersMap =
new HashMap[String, collection.mutable.Set[ContainerId]]
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 4bff846123..4e39c1d580 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -17,12 +17,9 @@
package org.apache.spark.deploy.yarn
-import java.lang.{Boolean => JBoolean}
import java.io.File
-import java.util.{Collections, Set => JSet}
import java.util.regex.Matcher
import java.util.regex.Pattern
-import java.util.concurrent.ConcurrentHashMap
import scala.collection.mutable.HashMap
@@ -32,7 +29,6 @@ import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.api.records.{Priority, ApplicationAccessType}
-import org.apache.hadoop.yarn.util.RackResolver
import org.apache.hadoop.conf.Configuration
import org.apache.spark.{SecurityManager, SparkConf}
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index be55d26f1c..72ec4d6b34 100644
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -17,33 +17,17 @@
package org.apache.spark.scheduler.cluster
-import org.apache.hadoop.yarn.util.RackResolver
-
import org.apache.spark._
import org.apache.spark.deploy.yarn.ApplicationMaster
-import org.apache.spark.scheduler.TaskSchedulerImpl
-import org.apache.spark.util.Utils
/**
* This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of
* ApplicationMaster, etc is done
*/
-private[spark] class YarnClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
+private[spark] class YarnClusterScheduler(sc: SparkContext) extends YarnScheduler(sc) {
logInfo("Created YarnClusterScheduler")
- // Nothing else for now ... initialize application master : which needs a SparkContext to
- // determine how to allocate.
- // Note that only the first creation of a SparkContext influences (and ideally, there must be
- // only one SparkContext, right ?). Subsequent creations are ignored since executors are already
- // allocated by then.
-
- // By default, rack is unknown
- override def getRackForHost(hostPort: String): Option[String] = {
- val host = Utils.parseHostPort(hostPort)._1
- Option(RackResolver.resolve(sc.hadoopConfiguration, host).getNetworkLocation)
- }
-
override def postStartHook() {
ApplicationMaster.sparkContextInitialized(sc)
super.postStartHook()
diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
index 2fa24cc433..4ebf3af12b 100644
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala
@@ -19,14 +19,18 @@ package org.apache.spark.scheduler.cluster
import org.apache.hadoop.yarn.util.RackResolver
+import org.apache.log4j.{Level, Logger}
+
import org.apache.spark._
import org.apache.spark.scheduler.TaskSchedulerImpl
import org.apache.spark.util.Utils
-/**
- * This scheduler launches executors through Yarn - by calling into Client to launch the Spark AM.
- */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
+private[spark] class YarnScheduler(sc: SparkContext) extends TaskSchedulerImpl(sc) {
+
+ // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
+ if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
+ Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
+ }
// By default, rack is unknown
override def getRackForHost(hostPort: String): Option[String] = {