aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorTom Magrino <tmagrino@fb.com>2016-06-28 13:36:41 -0700
committerShixiong Zhu <shixiong@databricks.com>2016-06-28 13:36:41 -0700
commitae14f362355b131fcb3e3633da7bb14bdd2b6893 (patch)
tree877fc906af08c4e0e807948323d3878b242debca /core
parentd59ba8e30751bbf91d49f5530b8242a12bbfb569 (diff)
downloadspark-ae14f362355b131fcb3e3633da7bb14bdd2b6893.tar.gz
spark-ae14f362355b131fcb3e3633da7bb14bdd2b6893.tar.bz2
spark-ae14f362355b131fcb3e3633da7bb14bdd2b6893.zip
[SPARK-16148][SCHEDULER] Allow for underscores in TaskLocation in the Executor ID
## What changes were proposed in this pull request? Previously, the TaskLocation implementation would not allow for executor ids which include underscores. This tweaks the string split used to get the hostname and executor id, allowing for underscores in the executor id. This addresses the JIRA found here: https://issues.apache.org/jira/browse/SPARK-16148 This is moved over from a previous PR against branch-1.6: https://github.com/apache/spark/pull/13857 ## How was this patch tested? Ran existing unit tests for core and streaming. Manually ran a simple streaming job with an executor whose id contained underscores and confirmed that the job ran successfully. This is my original work and I license the work to the project under the project's open source license. Author: Tom Magrino <tmagrino@fb.com> Closes #13858 from tmagrino/fixtasklocation.
Diffstat (limited to 'core')
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala14
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala2
2 files changed, 9 insertions, 7 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
index 1eb6c1614f..06b52935c6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
@@ -64,18 +64,18 @@ private[spark] object TaskLocation {
/**
* Create a TaskLocation from a string returned by getPreferredLocations.
- * These strings have the form [hostname] or hdfs_cache_[hostname], depending on whether the
- * location is cached.
+ * These strings have the form executor_[hostname]_[executorid], [hostname], or
+ * hdfs_cache_[hostname], depending on whether the location is cached.
*/
def apply(str: String): TaskLocation = {
val hstr = str.stripPrefix(inMemoryLocationTag)
if (hstr.equals(str)) {
if (str.startsWith(executorLocationTag)) {
- val splits = str.split("_")
- if (splits.length != 3) {
- throw new IllegalArgumentException("Illegal executor location format: " + str)
- }
- new ExecutorCacheTaskLocation(splits(1), splits(2))
+ val hostAndExecutorId = str.stripPrefix(executorLocationTag)
+ val splits = hostAndExecutorId.split("_", 2)
+ require(splits.length == 2, "Illegal executor location format: " + str)
+ val Array(host, executorId) = splits
+ new ExecutorCacheTaskLocation(host, executorId)
} else {
new HostTaskLocation(str)
}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index cfbabd8fb5..36d1c5690f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -802,6 +802,8 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
assert(TaskLocation("host1") === HostTaskLocation("host1"))
assert(TaskLocation("hdfs_cache_host1") === HDFSCacheTaskLocation("host1"))
assert(TaskLocation("executor_host1_3") === ExecutorCacheTaskLocation("host1", "3"))
+ assert(TaskLocation("executor_some.host1_executor_task_3") ===
+ ExecutorCacheTaskLocation("some.host1", "executor_task_3"))
}
test("Kill other task attempts when one attempt belonging to the same task succeeds") {