aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-08-25 00:04:10 -0700
committerMichael Armbrust <michael@databricks.com>2015-08-25 00:04:10 -0700
commit82268f07abfa658869df2354ae72f8d6ddd119e8 (patch)
treeffdb8d008d0314ab648412db96cb5cc1b65c5467 /sql/hive
parentbf03fe68d62f33dda70dff45c3bda1f57b032dfc (diff)
downloadspark-82268f07abfa658869df2354ae72f8d6ddd119e8.tar.gz
spark-82268f07abfa658869df2354ae72f8d6ddd119e8.tar.bz2
spark-82268f07abfa658869df2354ae72f8d6ddd119e8.zip
[SPARK-9293] [SPARK-9813] Analysis should check that set operations are only performed on tables with equal numbers of columns
This patch adds an analyzer rule to ensure that set operations (union, intersect, and except) are only applied to tables with the same number of columns. Without this rule, there are scenarios where invalid queries can return incorrect results instead of failing with error messages; SPARK-9813 provides one example of this problem. In other cases, the invalid query can crash at runtime with extremely confusing exceptions. I also performed a bit of cleanup to refactor some of those logical operators' code into a common `SetOperation` base class. Author: Josh Rosen <joshrosen@databricks.com> Closes #7631 from JoshRosen/SPARK-9293.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala2
2 files changed, 2 insertions, 2 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index bbe8c1911b..98d21aa76d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -751,7 +751,7 @@ private[hive] case class InsertIntoHiveTable(
extends LogicalPlan {
override def children: Seq[LogicalPlan] = child :: Nil
- override def output: Seq[Attribute] = child.output
+ override def output: Seq[Attribute] = Seq.empty
val numDynamicPartitions = partition.values.count(_.isEmpty)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 12c667e6e9..62efda613a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -61,7 +61,7 @@ case class InsertIntoHiveTable(
serializer
}
- def output: Seq[Attribute] = child.output
+ def output: Seq[Attribute] = Seq.empty
def saveAsHiveFile(
rdd: RDD[InternalRow],