From 2b10ebe6ac1cdc2c723cb47e4b88cfbf39e0de08 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 10 Aug 2016 17:05:50 +0800 Subject: [SPARK-16185][SQL] Better Error Messages When Creating Table As Select Without Enabling Hive Support #### What changes were proposed in this pull request? When we do not turn on the Hive Support, the following query generates a confusing error message by Planner: ```Scala sql("CREATE TABLE t2 SELECT a, b from t1") ``` ``` assertion failed: No plan for CreateTable CatalogTable( Table: `t2` Created: Tue Aug 09 23:45:32 PDT 2016 Last Access: Wed Dec 31 15:59:59 PST 1969 Type: MANAGED Provider: hive Storage(InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat)), ErrorIfExists +- Relation[a#19L,b#20L] parquet java.lang.AssertionError: assertion failed: No plan for CreateTable CatalogTable( Table: `t2` Created: Tue Aug 09 23:45:32 PDT 2016 Last Access: Wed Dec 31 15:59:59 PST 1969 Type: MANAGED Provider: hive Storage(InputFormat: org.apache.hadoop.mapred.TextInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat)), ErrorIfExists +- Relation[a#19L,b#20L] parquet ``` This PR is to issue a better error message: ``` Hive support is required to use CREATE Hive TABLE AS SELECT ``` #### How was this patch tested? Added test cases in `DDLSuite.scala` Author: gatorsmile Closes #13886 from gatorsmile/createCatalogedTableAsSelect. --- .../spark/sql/execution/datasources/rules.scala | 15 ++++++++++++ .../apache/spark/sql/internal/SessionState.scala | 3 ++- .../spark/sql/execution/command/DDLSuite.scala | 28 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'sql/core/src') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index c133dda13e..fc8d8c3667 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -272,6 +272,21 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] { } } +/** + * A rule to check whether the functions are supported only when Hive support is enabled + */ +object HiveOnlyCheck extends (LogicalPlan => Unit) { + def apply(plan: LogicalPlan): Unit = { + plan.foreach { + case CreateTable(tableDesc, _, Some(_)) + if tableDesc.provider.get == "hive" => + throw new AnalysisException("Hive support is required to use CREATE Hive TABLE AS SELECT") + + case _ => // OK + } + } +} + /** * A rule to do various checks before inserting into or writing to a data source table. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 052bce0923..ab27381c06 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -117,7 +117,8 @@ private[sql] class SessionState(sparkSession: SparkSession) { DataSourceAnalysis(conf) :: (if (conf.runSQLonFile) new ResolveDataSource(sparkSession) :: Nil else Nil) - override val extendedCheckRules = Seq(datasources.PreWriteCheck(conf, catalog)) + override val extendedCheckRules = + Seq(PreWriteCheck(conf, catalog), HiveOnlyCheck) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 53376c56f1..0eb3f2002d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1578,6 +1578,34 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { "WITH SERDEPROPERTIES ('spark.sql.sources.me'='anything')") } + test("Create Hive Table As Select") { + import testImplicits._ + withTable("t", "t1") { + var e = intercept[AnalysisException] { + sql("CREATE TABLE t SELECT 1 as a, 1 as b") + }.getMessage + assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) + + spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") + e = intercept[AnalysisException] { + sql("CREATE TABLE t SELECT a, b from t1") + }.getMessage + assert(e.contains("Hive support is required to use CREATE Hive TABLE AS SELECT")) + } + } + + test("Create Data Source Table As Select") { + import testImplicits._ + withTable("t", "t1", "t2") { + sql("CREATE TABLE t USING parquet SELECT 1 as a, 1 as b") + checkAnswer(spark.table("t"), Row(1, 1) :: Nil) + + spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1") + sql("CREATE TABLE t2 USING parquet SELECT a, b from t1") + checkAnswer(spark.table("t2"), spark.table("t1")) + } + } + test("drop current database") { sql("CREATE DATABASE temp") sql("USE temp") -- cgit v1.2.3