aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/scala/org
diff options
context:
space:
mode:
authorShixiong Zhu <shixiong@databricks.com>2016-11-30 16:18:53 -0800
committerShixiong Zhu <shixiong@databricks.com>2016-11-30 16:18:53 -0800
commitc4979f6ea8ed44fd87ded3133efa6df39d4842c3 (patch)
tree481132adf681b5295e076056da5e4fcd29b4295c /core/src/main/scala/org
parent93e9d880bf8a144112d74a6897af4e36fcfa5807 (diff)
downloadspark-c4979f6ea8ed44fd87ded3133efa6df39d4842c3.tar.gz
spark-c4979f6ea8ed44fd87ded3133efa6df39d4842c3.tar.bz2
spark-c4979f6ea8ed44fd87ded3133efa6df39d4842c3.zip
[SPARK-18655][SS] Ignore Structured Streaming 2.0.2 logs in history server
## What changes were proposed in this pull request? As `queryStatus` in StreamingQueryListener events was removed in #15954, parsing 2.0.2 structured streaming logs will throw the following errror: ``` [info] com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException: Unrecognized field "queryStatus" (class org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent), not marked as ignorable (2 known properties: "id", "exception"]) [info] at [Source: {"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent","queryStatus":{"name":"query-1","id":1,"timestamp":1480491532753,"inputRate":0.0,"processingRate":0.0,"latency":null,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"#0","inputRate":0.0,"processingRate":0.0,"triggerDetails":{"latency.getOffset.source":"1","triggerId":"1"}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[#0]"},"triggerDetails":{}},"exception":null}; line: 1, column: 521] (through reference chain: org.apache.spark.sql.streaming.QueryTerminatedEvent["queryStatus"]) [info] at com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException.from(UnrecognizedPropertyException.java:51) [info] at com.fasterxml.jackson.databind.DeserializationContext.reportUnknownProperty(DeserializationContext.java:839) [info] at com.fasterxml.jackson.databind.deser.std.StdDeserializer.handleUnknownProperty(StdDeserializer.java:1045) [info] at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.handleUnknownProperty(BeanDeserializerBase.java:1352) [info] at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.handleUnknownProperties(BeanDeserializerBase.java:1306) [info] at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeUsingPropertyBased(BeanDeserializer.java:453) [info] at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.deserializeFromObjectUsingNonDefault(BeanDeserializerBase.java:1099) ... ``` This PR just ignores such errors and adds a test to make sure we can read 2.0.2 logs. ## How was this patch tested? `query-event-logs-version-2.0.2.txt` has all types of events generated by Structured Streaming in Spark 2.0.2. `testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.2")` verified we can load them without any error. Author: Shixiong Zhu <shixiong@databricks.com> Closes #16085 from zsxwing/SPARK-18655.
Diffstat (limited to 'core/src/main/scala/org')
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala7
1 files changed, 7 insertions, 0 deletions
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 0bd5a6bc59..08e05ae0c0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -22,6 +22,7 @@ import java.io.{InputStream, IOException}
import scala.io.Source
import com.fasterxml.jackson.core.JsonParseException
+import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
import org.json4s.jackson.JsonMethods._
import org.apache.spark.internal.Logging
@@ -87,6 +88,12 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
// Ignore events generated by Structured Streaming in Spark 2.0.0 and 2.0.1.
// It's safe since no place uses them.
logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
+ case e: UnrecognizedPropertyException if e.getMessage != null && e.getMessage.startsWith(
+ "Unrecognized field \"queryStatus\" " +
+ "(class org.apache.spark.sql.streaming.StreamingQueryListener$") =>
+ // Ignore events generated by Structured Streaming in Spark 2.0.2
+ // It's safe since no place uses them.
+ logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
case jpe: JsonParseException =>
// We can only ignore exception from last line of the file that might be truncated
// the last entry may not be the very last line in the event log, but we treat it