[SPARK-19718][SS] Handle more interrupt cases properly for Hadoop

## What changes were proposed in this pull request? [SPARK-19617](https://issues.apache.org/jira/browse/SPARK-19617) changed `HDFSMetadataLog` to enable interrupts when using the local file system. However, now we hit [HADOOP-12074](https://issues.apache.org/jira/browse/HADOOP-12074): `Shell.runCommand` converts `InterruptedException` to `new IOException(ie.toString())` before Hadoop 2.8. This is the Hadoop patch to fix HADOOP-1207: https://github.com/apache/hadoop/commit/95c73d49b1bb459b626a9ac52acadb8f5fa724de This PR adds new logic to handle the following cases related to `InterruptedException`. - Check if the message of IOException starts with `java.lang.InterruptedException`. If so, treat it as `InterruptedException`. This is for pre-Hadoop 2.8. - Treat `InterruptedIOException` as `InterruptedException`. This is for Hadoop 2.8+ and other places that may throw `InterruptedIOException` when the thread is interrupted. ## How was this patch tested? The new unit test. Author: Shixiong Zhu <shixiong@databricks.com> Closes #17044 from zsxwing/SPARK-19718.
author: Shixiong Zhu <shixiong@databricks.com> 2017-03-03 17:10:11 -0800
committer: Shixiong Zhu <shixiong@databricks.com> 2017-03-03 17:10:11 -0800
commit: a6a7a95e2f3482d84fcd744713e43f80ea90e33a (patch)
tree: 62bd8133ef9161071793b87a9cd3d3ea58b917e9 /sql/core/src/main
parent: f5fdbe04369d2c04972b7d9686d0c6c046f3d3dc (diff)
download: spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.tar.gz
spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.tar.bz2
spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.zip
1 files changed, 15 insertions, 5 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 6e77f354b5..70912d13ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.io.{InterruptedIOException, IOException}
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
@@ -37,6 +38,12 @@ import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
 
+/** States for [[StreamExecution]]'s lifecycle. */
+trait State
+case object INITIALIZING extends State
+case object ACTIVE extends State
+case object TERMINATED extends State
+
 /**
  * Manages the execution of a streaming Spark SQL query that is occurring in a separate thread.
  * Unlike a standard query, a streaming query executes repeatedly each time new data arrives at any
@@ -298,7 +305,14 @@ class StreamExecution(
         // `stop()` is already called. Let `finally` finish the cleanup.
       }
     } catch {
-      case _: InterruptedException if state.get == TERMINATED => // interrupted by stop()
+      case _: InterruptedException | _: InterruptedIOException if state.get == TERMINATED =>
+        // interrupted by stop()
+        updateStatusMessage("Stopped")
+      case e: IOException if e.getMessage != null
+        && e.getMessage.startsWith(classOf[InterruptedException].getName)
+        && state.get == TERMINATED =>
+        // This is a workaround for HADOOP-12074: `Shell.runCommand` converts `InterruptedException`
+        // to `new IOException(ie.toString())` before Hadoop 2.8.
         updateStatusMessage("Stopped")
       case e: Throwable =>
         streamDeathCause = new StreamingQueryException(
@@ -721,10 +735,6 @@ class StreamExecution(
     }
   }
 
-  trait State
-  case object INITIALIZING extends State
-  case object ACTIVE extends State
-  case object TERMINATED extends State
 }
author	Shixiong Zhu <shixiong@databricks.com>	2017-03-03 17:10:11 -0800
committer	Shixiong Zhu <shixiong@databricks.com>	2017-03-03 17:10:11 -0800
commit	a6a7a95e2f3482d84fcd744713e43f80ea90e33a (patch)
tree	62bd8133ef9161071793b87a9cd3d3ea58b917e9 /sql/core/src/main
parent	f5fdbe04369d2c04972b7d9686d0c6c046f3d3dc (diff)
download	spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.tar.gz spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.tar.bz2 spark-a6a7a95e2f3482d84fcd744713e43f80ea90e33a.zip