16 files changed, 685 insertions, 12 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/OutputMode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/OutputMode.scala
new file mode 100644
index 0000000000..a4d387eae3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/OutputMode.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+sealed trait OutputMode
+
+case object Append extends OutputMode
+case object Update extends OutputMode
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
new file mode 100644
index 0000000000..aadc1d31bd
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+
+/**
+ * Analyzes the presence of unsupported operations in a logical plan.
+ */
+object UnsupportedOperationChecker {
+
+  def checkForBatch(plan: LogicalPlan): Unit = {
+    plan.foreachUp {
+      case p if p.isStreaming =>
+        throwError(
+          "Queries with streaming sources must be executed with write.startStream()")(p)
+
+      case _ =>
+    }
+  }
+
+  def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
+
+    if (!plan.isStreaming) {
+      throwError(
+        "Queries without streaming sources cannot be executed with write.startStream()")(plan)
+    }
+
+    plan.foreachUp { implicit plan =>
+
+      // Operations that cannot exists anywhere in a streaming plan
+      plan match {
+
+        case _: Command =>
+          throwError("Commands like CreateTable*, AlterTable*, Show* are not supported with " +
+            "streaming DataFrames/Datasets")
+
+        case _: InsertIntoTable =>
+          throwError("InsertIntoTable is not supported with streaming DataFrames/Datasets")
+
+        case Aggregate(_, _, child) if child.isStreaming && outputMode == Append =>
+          throwError(
+            "Aggregations are not supported on streaming DataFrames/Datasets in " +
+              "Append output mode. Consider changing output mode to Update.")
+
+        case Join(left, right, joinType, _) =>
+
+          joinType match {
+
+            case Inner =>
+              if (left.isStreaming && right.isStreaming) {
+                throwError("Inner join between two streaming DataFrames/Datasets is not supported")
+              }
+
+            case FullOuter =>
+              if (left.isStreaming || right.isStreaming) {
+                throwError("Full outer joins with streaming DataFrames/Datasets are not supported")
+              }
+
+
+            case LeftOuter | LeftSemi | LeftAnti =>
+              if (right.isStreaming) {
+                throwError("Left outer/semi/anti joins with a streaming DataFrame/Dataset " +
+                    "on the right is not supported")
+              }
+
+            case RightOuter =>
+              if (left.isStreaming) {
+                throwError("Right outer join with a streaming DataFrame/Dataset on the left is " +
+                    "not supported")
+              }
+
+            case NaturalJoin(_) | UsingJoin(_, _) =>
+              // They should not appear in an analyzed plan.
+
+            case _ =>
+              throwError(s"Join type $joinType is not supported with streaming DataFrame/Dataset")
+          }
+
+        case c: CoGroup if c.children.exists(_.isStreaming) =>
+          throwError("CoGrouping with a streaming DataFrame/Dataset is not supported")
+
+        case u: Union if u.children.map(_.isStreaming).distinct.size == 2 =>
+          throwError("Union between streaming and batch DataFrames/Datasets is not supported")
+
+        case Except(left, right) if right.isStreaming =>
+          throwError("Except with a streaming DataFrame/Dataset on the right is not supported")
+
+        case Intersect(left, right) if left.isStreaming && right.isStreaming =>
+          throwError("Intersect between two streaming DataFrames/Datasets is not supported")
+
+        case GroupingSets(_, _, child, _) if child.isStreaming =>
+          throwError("GroupingSets is not supported on streaming DataFrames/Datasets")
+
+        case GlobalLimit(_, _) | LocalLimit(_, _) if plan.children.forall(_.isStreaming) =>
+          throwError("Limits are not supported on streaming DataFrames/Datasets")
+
+        case Sort(_, _, _) | SortPartitions(_, _) if plan.children.forall(_.isStreaming) =>
+          throwError("Sorting is not supported on streaming DataFrames/Datasets")
+
+        case Sample(_, _, _, _, child) if child.isStreaming =>
+          throwError("Sampling is not supported on streaming DataFrames/Datasets")
+
+        case Window(_, _, _, child) if child.isStreaming =>
+          throwError("Non-time-based windows are not supported on streaming DataFrames/Datasets")
+
+        case ReturnAnswer(child) if child.isStreaming =>
+          throwError("Cannot return immediate result on streaming DataFrames/Dataset. Queries " +
+            "with streaming DataFrames/Datasets must be executed with write.startStream().")
+
+        case _ =>
+      }
+    }
+  }
+
+  private def throwErrorIf(
+      condition: Boolean,
+      msg: String)(implicit operator: LogicalPlan): Unit = {
+    if (condition) {
+      throwError(msg)
+    }
+  }
+
+  private def throwError(msg: String)(implicit operator: LogicalPlan): Nothing = {
+    throw new AnalysisException(
+      msg, operator.origin.line, operator.origin.startPosition, Some(operator))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 1e7296664b..958966328b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -297,6 +297,24 @@ package object dsl {
         condition: Option[Expression] = None): LogicalPlan =
         Join(logicalPlan, otherPlan, joinType, condition)
 
+      def cogroup[Key: Encoder, Left: Encoder, Right: Encoder, Result: Encoder](
+          otherPlan: LogicalPlan,
+          func: (Key, Iterator[Left], Iterator[Right]) => TraversableOnce[Result],
+          leftGroup: Seq[Attribute],
+          rightGroup: Seq[Attribute],
+          leftAttr: Seq[Attribute],
+          rightAttr: Seq[Attribute]
+        ): LogicalPlan = {
+        CoGroup.apply[Key, Left, Right, Result](
+          func,
+          leftGroup,
+          rightGroup,
+          leftAttr,
+          rightAttr,
+          logicalPlan,
+          otherPlan)
+      }
+
       def orderBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, true, logicalPlan)
 
       def sortBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, false, logicalPlan)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index aceeb8aadc..45ac126a72 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -42,6 +42,9 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
    */
   def analyzed: Boolean = _analyzed
 
+  /** Returns true if this subtree contains any streaming data sources. */
+  def isStreaming: Boolean = children.exists(_.isStreaming == true)
+
   /**
    * Returns a copy of this node where `rule` has been recursively applied first to all of its
    * children and then itself (post-order). When `rule` does not apply to a given node, it is left
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
new file mode 100644
index 0000000000..ce00a03e76
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.IntegerType
+
+class UnsupportedOperationsSuite extends SparkFunSuite {
+
+  val attribute = AttributeReference("a", IntegerType, nullable = true)()
+  val batchRelation = LocalRelation(attribute)
+  val streamRelation = new TestStreamingRelation(attribute)
+
+  /*
+    =======================================================================================
+                                     BATCH QUERIES
+    =======================================================================================
+   */
+
+  assertSupportedInBatchPlan("local relation", batchRelation)
+
+  assertNotSupportedInBatchPlan(
+    "streaming source",
+    streamRelation,
+    Seq("with streaming source", "startStream"))
+
+  assertNotSupportedInBatchPlan(
+    "select on streaming source",
+    streamRelation.select($"count(*)"),
+    Seq("with streaming source", "startStream"))
+
+
+  /*
+    =======================================================================================
+                                     STREAMING QUERIES
+    =======================================================================================
+   */
+
+  // Batch plan in streaming query
+  testError(
+    "streaming plan - no streaming source",
+    Seq("without streaming source", "startStream")) {
+    UnsupportedOperationChecker.checkForStreaming(batchRelation.select($"count(*)"), Append)
+  }
+
+  // Commands
+  assertNotSupportedInStreamingPlan(
+    "commmands",
+    DescribeFunction("func", true),
+    outputMode = Append,
+    expectedMsgs = "commands" :: Nil)
+
+  // Aggregates: Not supported on streams in Append mode
+  assertSupportedInStreamingPlan(
+    "aggregate - batch with update output mode",
+    batchRelation.groupBy("a")("count(*)"),
+    outputMode = Update)
+
+  assertSupportedInStreamingPlan(
+    "aggregate - batch with append output mode",
+    batchRelation.groupBy("a")("count(*)"),
+    outputMode = Append)
+
+  assertSupportedInStreamingPlan(
+    "aggregate - stream with update output mode",
+    streamRelation.groupBy("a")("count(*)"),
+    outputMode = Update)
+
+  assertNotSupportedInStreamingPlan(
+    "aggregate - stream with append output mode",
+    streamRelation.groupBy("a")("count(*)"),
+    outputMode = Append,
+    Seq("aggregation", "append output mode"))
+
+  // Inner joins: Stream-stream not supported
+  testBinaryOperationInStreamingPlan(
+    "inner join",
+    _.join(_, joinType = Inner),
+    streamStreamSupported = false)
+
+  // Full outer joins: only batch-batch is allowed
+  testBinaryOperationInStreamingPlan(
+    "full outer join",
+    _.join(_, joinType = FullOuter),
+    streamStreamSupported = false,
+    batchStreamSupported = false,
+    streamBatchSupported = false)
+
+  // Left outer joins: *-stream not allowed
+  testBinaryOperationInStreamingPlan(
+    "left outer join",
+    _.join(_, joinType = LeftOuter),
+    streamStreamSupported = false,
+    batchStreamSupported = false,
+    expectedMsg = "left outer/semi/anti joins")
+
+  // Left semi joins: stream-* not allowed
+  testBinaryOperationInStreamingPlan(
+    "left semi join",
+    _.join(_, joinType = LeftSemi),
+    streamStreamSupported = false,
+    batchStreamSupported = false,
+    expectedMsg = "left outer/semi/anti joins")
+
+  // Left anti joins: stream-* not allowed
+  testBinaryOperationInStreamingPlan(
+    "left anti join",
+    _.join(_, joinType = LeftAnti),
+    streamStreamSupported = false,
+    batchStreamSupported = false,
+    expectedMsg = "left outer/semi/anti joins")
+
+  // Right outer joins: stream-* not allowed
+  testBinaryOperationInStreamingPlan(
+    "right outer join",
+    _.join(_, joinType = RightOuter),
+    streamStreamSupported = false,
+    streamBatchSupported = false)
+
+  // Cogroup: only batch-batch is allowed
+  testBinaryOperationInStreamingPlan(
+    "cogroup",
+    genCogroup,
+    streamStreamSupported = false,
+    batchStreamSupported = false,
+    streamBatchSupported = false)
+
+  def genCogroup(left: LogicalPlan, right: LogicalPlan): LogicalPlan = {
+    def func(k: Int, left: Iterator[Int], right: Iterator[Int]): Iterator[Int] = {
+      Iterator.empty
+    }
+    implicit val intEncoder = ExpressionEncoder[Int]
+
+    left.cogroup[Int, Int, Int, Int](
+      right,
+      func,
+      AppendColumns[Int, Int]((x: Int) => x, left).newColumns,
+      AppendColumns[Int, Int]((x: Int) => x, right).newColumns,
+      left.output,
+      right.output)
+  }
+
+  // Union: Mixing between stream and batch not supported
+  testBinaryOperationInStreamingPlan(
+    "union",
+    _.union(_),
+    streamBatchSupported = false,
+    batchStreamSupported = false)
+
+  // Except: *-stream not supported
+  testBinaryOperationInStreamingPlan(
+    "except",
+    _.except(_),
+    streamStreamSupported = false,
+    batchStreamSupported = false)
+
+  // Intersect: stream-stream not supported
+  testBinaryOperationInStreamingPlan(
+    "intersect",
+    _.intersect(_),
+    streamStreamSupported = false)
+
+
+  // Unary operations
+  testUnaryOperatorInStreamingPlan("sort", Sort(Nil, true, _))
+  testUnaryOperatorInStreamingPlan("sort partitions", SortPartitions(Nil, _), expectedMsg = "sort")
+  testUnaryOperatorInStreamingPlan(
+    "sample", Sample(0.1, 1, true, 1L, _)(), expectedMsg = "sampling")
+  testUnaryOperatorInStreamingPlan(
+    "window", Window(Nil, Nil, Nil, _), expectedMsg = "non-time-based windows")
+
+
+  /*
+    =======================================================================================
+                                     TESTING FUNCTIONS
+    =======================================================================================
+   */
+
+  /**
+   * Test that an unary operator correctly fails support check when it has a streaming child plan,
+   * but not when it has batch child plan. There can be batch sub-plans inside a streaming plan,
+   * so it is valid for the operator to have a batch child plan.
+   *
+   * This test wraps the logical plan in a fake operator that makes the whole plan look like
+   * a streaming plan even if the child plan is a batch plan. This is to test that the operator
+   * supports having a batch child plan, forming a batch subplan inside a streaming plan.
+   */
+  def testUnaryOperatorInStreamingPlan(
+    operationName: String,
+    logicalPlanGenerator: LogicalPlan => LogicalPlan,
+    outputMode: OutputMode = Append,
+    expectedMsg: String = ""): Unit = {
+
+    val expectedMsgs = if (expectedMsg.isEmpty) Seq(operationName) else Seq(expectedMsg)
+
+    assertNotSupportedInStreamingPlan(
+      s"$operationName with stream relation",
+      wrapInStreaming(logicalPlanGenerator(streamRelation)),
+      outputMode,
+      expectedMsgs)
+
+    assertSupportedInStreamingPlan(
+      s"$operationName with batch relation",
+      wrapInStreaming(logicalPlanGenerator(batchRelation)),
+      outputMode)
+  }
+
+
+  /**
+   * Test that a binary operator correctly fails support check when it has combinations of
+   * streaming and batch child plans. There can be batch sub-plans inside a streaming plan,
+   * so it is valid for the operator to have a batch child plan.
+   */
+  def testBinaryOperationInStreamingPlan(
+      operationName: String,
+      planGenerator: (LogicalPlan, LogicalPlan) => LogicalPlan,
+      outputMode: OutputMode = Append,
+      streamStreamSupported: Boolean = true,
+      streamBatchSupported: Boolean = true,
+      batchStreamSupported: Boolean = true,
+      expectedMsg: String = ""): Unit = {
+
+    val expectedMsgs = if (expectedMsg.isEmpty) Seq(operationName) else Seq(expectedMsg)
+
+    if (streamStreamSupported) {
+      assertSupportedInStreamingPlan(
+        s"$operationName with stream-stream relations",
+        planGenerator(streamRelation, streamRelation),
+        outputMode)
+    } else {
+      assertNotSupportedInStreamingPlan(
+        s"$operationName with stream-stream relations",
+        planGenerator(streamRelation, streamRelation),
+        outputMode,
+        expectedMsgs)
+    }
+
+    if (streamBatchSupported) {
+      assertSupportedInStreamingPlan(
+        s"$operationName with stream-batch relations",
+        planGenerator(streamRelation, batchRelation),
+        outputMode)
+    } else {
+      assertNotSupportedInStreamingPlan(
+        s"$operationName with stream-batch relations",
+        planGenerator(streamRelation, batchRelation),
+        outputMode,
+        expectedMsgs)
+    }
+
+    if (batchStreamSupported) {
+      assertSupportedInStreamingPlan(
+        s"$operationName with batch-stream relations",
+        planGenerator(batchRelation, streamRelation),
+        outputMode)
+    } else {
+      assertNotSupportedInStreamingPlan(
+        s"$operationName with batch-stream relations",
+        planGenerator(batchRelation, streamRelation),
+        outputMode,
+        expectedMsgs)
+    }
+
+    assertSupportedInStreamingPlan(
+      s"$operationName with batch-batch relations",
+      planGenerator(batchRelation, batchRelation),
+      outputMode)
+  }
+
+  /**
+   * Assert that the logical plan is supported as subplan insider a streaming plan.
+   *
+   * To test this correctly, the given logical plan is wrapped in a fake operator that makes the
+   * whole plan look like a streaming plan. Otherwise, a batch plan may throw not supported
+   * exception simply for not being a streaming plan, even though that plan could exists as batch
+   * subplan inside some streaming plan.
+   */
+  def assertSupportedInStreamingPlan(
+      name: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode): Unit = {
+    test(s"streaming plan - $name: supported") {
+      UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
+    }
+  }
+
+  /**
+   * Assert that the logical plan is not supported inside a streaming plan.
+   *
+   * To test this correctly, the given logical plan is wrapped in a fake operator that makes the
+   * whole plan look like a streaming plan. Otherwise, a batch plan may throw not supported
+   * exception simply for not being a streaming plan, even though that plan could exists as batch
+   * subplan inside some streaming plan.
+   */
+  def assertNotSupportedInStreamingPlan(
+      name: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      expectedMsgs: Seq[String]): Unit = {
+    testError(
+      s"streaming plan - $name: not supported",
+      expectedMsgs :+ "streaming" :+ "DataFrame" :+ "Dataset" :+ "not supported") {
+      UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
+    }
+  }
+
+  /** Assert that the logical plan is supported as a batch plan */
+  def assertSupportedInBatchPlan(name: String, plan: LogicalPlan): Unit = {
+    test(s"batch plan - $name: supported") {
+      UnsupportedOperationChecker.checkForBatch(plan)
+    }
+  }
+
+  /** Assert that the logical plan is not supported as a batch plan */
+  def assertNotSupportedInBatchPlan(
+      name: String,
+      plan: LogicalPlan,
+      expectedMsgs: Seq[String]): Unit = {
+    testError(s"batch plan - $name: not supported", expectedMsgs) {
+      UnsupportedOperationChecker.checkForBatch(plan)
+    }
+  }
+
+  /**
+   * Test whether the body of code will fail. If it does fail, then check if it has expected
+   * messages.
+   */
+  def testError(testName: String, expectedMsgs: Seq[String])(testBody: => Unit): Unit = {
+
+    test(testName) {
+      val e = intercept[AnalysisException] {
+        testBody
+      }
+
+      if (!expectedMsgs.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) {
+        fail(
+          s"""Exception message should contain the following substrings:
+              |
+          |  ${expectedMsgs.mkString("\n  ")}
+              |
+          |Actual exception message:
+              |
+          |  ${e.getMessage}
+          """.stripMargin)
+      }
+    }
+  }
+
+  def wrapInStreaming(plan: LogicalPlan): LogicalPlan = {
+    new StreamingPlanWrapper(plan)
+  }
+
+  case class StreamingPlanWrapper(child: LogicalPlan) extends UnaryNode {
+    override def output: Seq[Attribute] = child.output
+    override def isStreaming: Boolean = true
+  }
+
+  case class TestStreamingRelation(output: Seq[Attribute]) extends LeafNode {
+    def this(attribute: Attribute) = this(Seq(attribute))
+    override def isStreaming: Boolean = true
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index faef9ed274..cc86f1f6e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.IntegerType
 
 /**
  * This suite is used to test [[LogicalPlan]]'s `resolveOperators` and make sure it can correctly
@@ -68,4 +70,23 @@ class LogicalPlanSuite extends SparkFunSuite {
 
     assert(invocationCount === 1)
   }
+
+  test("isStreaming") {
+    val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)())
+    val incrementalRelation = new LocalRelation(
+      Seq(AttributeReference("a", IntegerType, nullable = true)())) {
+      override def isStreaming(): Boolean = true
+    }
+
+    case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
+      override def output: Seq[Attribute] = left.output ++ right.output
+    }
+
+    require(relation.isStreaming === false)
+    require(incrementalRelation.isStreaming === true)
+    assert(TestBinaryRelation(relation, relation).isStreaming === false)
+    assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true)
+    assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true)
+    assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
index 1343e81569..39d04ed8c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ContinuousQueryManager.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql
 import scala.collection.mutable
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.analysis.{Append, OutputMode, UnsupportedOperationChecker}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.ContinuousQueryListener
 
 /**
@@ -172,14 +174,23 @@ class ContinuousQueryManager(sqlContext: SQLContext) {
       checkpointLocation: String,
       df: DataFrame,
       sink: Sink,
-      trigger: Trigger = ProcessingTime(0)): ContinuousQuery = {
+      trigger: Trigger = ProcessingTime(0),
+      outputMode: OutputMode = Append): ContinuousQuery = {
     activeQueriesLock.synchronized {
       if (activeQueries.contains(name)) {
         throw new IllegalArgumentException(
           s"Cannot start query with name $name as a query with that name is already active")
       }
+      val analyzedPlan = df.queryExecution.analyzed
+      df.queryExecution.assertAnalyzed()
+
+      if (sqlContext.conf.getConf(SQLConf.UNSUPPORTED_OPERATION_CHECK_ENABLED)) {
+        UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
+      }
+
       var nextSourceId = 0L
-      val logicalPlan = df.logicalPlan.transform {
+
+      val logicalPlan = analyzedPlan.transform {
         case StreamingRelation(dataSource, _, output) =>
           // Materialize source to avoid creating it in every batch
           val metadataPath = s"$checkpointLocation/sources/$nextSourceId"
@@ -195,6 +206,7 @@ class ContinuousQueryManager(sqlContext: SQLContext) {
         checkpointLocation,
         logicalPlan,
         sink,
+        outputMode,
         trigger)
       query.start()
       activeQueries.put(name, query)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index fb3e184a64..1a09d70fb9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -461,9 +461,7 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   @Experimental
-  def isStreaming: Boolean = logicalPlan.find { n =>
-      n.isInstanceOf[StreamingRelation] || n.isInstanceOf[StreamingExecutionRelation]
-    }.isDefined
+  def isStreaming: Boolean = logicalPlan.isStreaming
 
   /**
    * Displays the [[Dataset]] in a tabular form. Strings more than 20 characters will be truncated,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index f5e1e77263..ddcae0fe07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.execution
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The primary workflow for executing relational queries using Spark.  Designed to allow easy
@@ -43,10 +45,17 @@ class QueryExecution(val sqlContext: SQLContext, val logical: LogicalPlan) {
       throw ae
   }
 
+  def assertSupported(): Unit = {
+    if (sqlContext.conf.getConf(SQLConf.UNSUPPORTED_OPERATION_CHECK_ENABLED)) {
+      UnsupportedOperationChecker.checkForBatch(analyzed)
+    }
+  }
+
   lazy val analyzed: LogicalPlan = sqlContext.sessionState.analyzer.execute(logical)
 
   lazy val withCachedData: LogicalPlan = {
     assertAnalyzed()
+    assertSupported()
     sqlContext.cacheManager.useCachedData(analyzed)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index aaced49dd1..81244ed874 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.analysis.{OutputMode, UnsupportedOperationChecker}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SparkPlanner, UnaryNode}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A variant of [[QueryExecution]] that allows the execution of the given [[LogicalPlan]]
@@ -29,6 +31,7 @@ import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SparkPlanner,
 class IncrementalExecution(
     ctx: SQLContext,
     logicalPlan: LogicalPlan,
+    outputMode: OutputMode,
     checkpointLocation: String,
     currentBatchId: Long) extends QueryExecution(ctx, logicalPlan) {
 
@@ -69,4 +72,7 @@ class IncrementalExecution(
   }
 
   override def preparations: Seq[Rule[SparkPlan]] = state +: super.preparations
+
+  /** No need assert supported, as this check has already been done */
+  override def assertSupported(): Unit = { }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 87dd27a2b1..2a1fa1ba62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.OutputMode
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
@@ -48,6 +49,7 @@ class StreamExecution(
     checkpointRoot: String,
     private[sql] val logicalPlan: LogicalPlan,
     val sink: Sink,
+    val outputMode: OutputMode,
     val trigger: Trigger) extends ContinuousQuery with Logging {
 
   /** An monitor used to wait/notify when batches complete. */
@@ -314,8 +316,13 @@ class StreamExecution(
     }
 
     val optimizerStart = System.nanoTime()
-    lastExecution =
-        new IncrementalExecution(sqlContext, newPlan, checkpointFile("state"), currentBatchId)
+    lastExecution = new IncrementalExecution(
+      sqlContext,
+      newPlan,
+      outputMode,
+      checkpointFile("state"),
+      currentBatchId)
+
     lastExecution.executedPlan
     val optimizerTime = (System.nanoTime() - optimizerStart).toDouble / 1000000
     logDebug(s"Optimized batch in ${optimizerTime}ms")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index d2872e49ce..c29291eb58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -37,6 +37,7 @@ object StreamingRelation {
  */
 case class StreamingRelation(dataSource: DataSource, sourceName: String, output: Seq[Attribute])
   extends LeafNode {
+  override def isStreaming: Boolean = true
   override def toString: String = sourceName
 }
 
@@ -45,6 +46,7 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
  * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].
  */
 case class StreamingExecutionRelation(source: Source, output: Seq[Attribute]) extends LeafNode {
+  override def isStreaming: Boolean = true
   override def toString: String = source.toString
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 70e18cebdd..7f206bdb9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -442,6 +442,14 @@ object SQLConf {
     .stringConf
     .createOptional
 
+  val UNSUPPORTED_OPERATION_CHECK_ENABLED =
+    SQLConfigBuilder("spark.sql.streaming.unsupportedOperationCheck")
+      .internal()
+      .doc("When true, the logical plan for continuous query will be checked for unsupported" +
+        " operations.")
+      .booleanConf
+      .createWithDefault(true)
+
   // TODO: This is still WIP and shouldn't be turned on without extensive test coverage
   val COLUMNAR_AGGREGATE_MAP_ENABLED = SQLConfigBuilder("spark.sql.codegen.aggregate.map.enabled")
     .internal()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
index 6ccc99fe17..242ea9cb27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
@@ -33,6 +33,7 @@ import org.scalatest.exceptions.TestFailedDueToTimeoutException
 import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.sql.catalyst.analysis.{Append, OutputMode}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
@@ -75,6 +76,8 @@ trait StreamTest extends QueryTest with Timeouts {
   /** How long to wait for an active stream to catch up when checking a result. */
   val streamingTimeout = 10.seconds
 
+  val outputMode: OutputMode = Append
+
   /** A trait for actions that can be performed while testing a streaming DataFrame. */
   trait StreamAction
 
@@ -228,6 +231,7 @@ trait StreamTest extends QueryTest with Timeouts {
          |$testActions
          |
          |== Stream ==
+         |Output Mode: $outputMode
          |Stream state: $currentOffsets
          |Thread state: $threadState
          |${if (streamDeathCause != null) stackTraceToString(streamDeathCause) else ""}
@@ -235,6 +239,7 @@ trait StreamTest extends QueryTest with Timeouts {
          |== Sink ==
          |${sink.toDebugString}
          |
+         |
          |== Plan ==
          |${if (currentStream != null) currentStream.lastExecution else ""}
          """.stripMargin
@@ -293,7 +298,8 @@ trait StreamTest extends QueryTest with Timeouts {
                   StreamExecution.nextName,
                   metadataRoot,
                   stream,
-                  sink)
+                  sink,
+                  outputMode = outputMode)
                 .asInstanceOf[StreamExecution]
             currentStream.microBatchThread.setUncaughtExceptionHandler(
               new UncaughtExceptionHandler {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 2bd27c7efd..6f3149dbc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
-import org.scalatest.concurrent.Eventually._
-
-import org.apache.spark.sql.{DataFrame, Row, SQLContext, StreamTest}
+import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
@@ -108,6 +107,35 @@ class StreamSuite extends StreamTest with SharedSQLContext {
     assertDF(df)
     assertDF(df)
   }
+
+  test("unsupported queries") {
+    val streamInput = MemoryStream[Int]
+    val batchInput = Seq(1, 2, 3).toDS()
+
+    def assertError(expectedMsgs: Seq[String])(body: => Unit): Unit = {
+      val e = intercept[AnalysisException] {
+        body
+      }
+      expectedMsgs.foreach { s => assert(e.getMessage.contains(s)) }
+    }
+
+    // Running streaming plan as a batch query
+    assertError("startStream" :: Nil) {
+      streamInput.toDS.map { i => i }.count()
+    }
+
+    // Running non-streaming plan with as a streaming query
+    assertError("without streaming sources" :: "startStream" :: Nil) {
+      val ds = batchInput.map { i => i }
+      testStream(ds)()
+    }
+
+    // Running streaming plan that cannot be incrementalized
+    assertError("not supported" :: "streaming" :: Nil) {
+      val ds = streamInput.toDS.map { i => i }.sort()
+      testStream(ds)()
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 3af7c01e52..fa3b122f6d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.StreamTest
+import org.apache.spark.sql.catalyst.analysis.Update
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.expressions.scala.typed
 import org.apache.spark.sql.functions._
@@ -32,6 +33,8 @@ class StreamingAggregationSuite extends StreamTest with SharedSQLContext {
 
   import testImplicits._
 
+  override val outputMode = Update
+
   test("simple count") {
     val inputData = MemoryStream[Int]