diff options
author | Yin Huai <yhuai@databricks.com> | 2016-06-19 21:45:53 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-06-19 21:45:53 -0700 |
commit | 6d0f921aedfdd3b7e8472b6776d0c7d8299190bd (patch) | |
tree | 29f6f34219596d37c44927ff386a77550854bf41 /sql/hive | |
parent | 4f17fddcd57adeae0d7e31bd14423283d4b625e9 (diff) | |
download | spark-6d0f921aedfdd3b7e8472b6776d0c7d8299190bd.tar.gz spark-6d0f921aedfdd3b7e8472b6776d0c7d8299190bd.tar.bz2 spark-6d0f921aedfdd3b7e8472b6776d0c7d8299190bd.zip |
[SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement
## What changes were proposed in this pull request?
This PR is the follow-up PR for https://github.com/apache/spark/pull/13754/files and https://github.com/apache/spark/pull/13749. I will comment inline to explain my changes.
## How was this patch tested?
Existing tests.
Author: Yin Huai <yhuai@databricks.com>
Closes #13766 from yhuai/caseSensitivity.
Diffstat (limited to 'sql/hive')
4 files changed, 56 insertions, 33 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala index 2d286715b5..f6675f0904 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala @@ -65,7 +65,7 @@ private[hive] class HiveSessionState(sparkSession: SparkSession) catalog.ParquetConversions :: catalog.OrcConversions :: catalog.CreateTables :: - PreprocessTableInsertion :: + PreprocessTableInsertion(conf) :: DataSourceAnalysis :: (if (conf.runSQLonFile) new ResolveDataSource(sparkSession) :: Nil else Nil) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala index c48735142d..d4ebd051d2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala @@ -372,4 +372,24 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef assert(!logical.resolved, "Should not resolve: missing partition data") } } + + testPartitionedTable( + "SPARK-16036: better error message when insert into a table with mismatch schema") { + tableName => + val e = intercept[AnalysisException] { + sql(s"INSERT INTO TABLE $tableName PARTITION(b=1, c=2) SELECT 1, 2, 3") + } + assert(e.message.contains("the number of columns are different")) + } + + testPartitionedTable( + "SPARK-16037: INSERT statement should match columns by position") { + tableName => + withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { + sql(s"INSERT INTO TABLE $tableName SELECT 1, 2 AS c, 3 AS b") + checkAnswer(sql(s"SELECT a, b, c FROM $tableName"), Row(1, 2, 3)) + sql(s"INSERT OVERWRITE TABLE $tableName SELECT 1, 2, 3") + checkAnswer(sql(s"SELECT a, b, c FROM $tableName"), Row(1, 2, 3)) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index a16b5b2e23..85b159e2a5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -1033,6 +1033,41 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { sql("SELECT * FROM boom").queryExecution.analyzed } + test("SPARK-3810: PreprocessTableInsertion static partitioning support") { + val analyzedPlan = { + loadTestTable("srcpart") + sql("DROP TABLE IF EXISTS withparts") + sql("CREATE TABLE withparts LIKE srcpart") + sql("INSERT INTO TABLE withparts PARTITION(ds='1', hr='2') SELECT key, value FROM src") + .queryExecution.analyzed + } + + assertResult(1, "Duplicated project detected\n" + analyzedPlan) { + analyzedPlan.collect { + case _: Project => () + }.size + } + } + + test("SPARK-3810: PreprocessTableInsertion dynamic partitioning support") { + val analyzedPlan = { + loadTestTable("srcpart") + sql("DROP TABLE IF EXISTS withparts") + sql("CREATE TABLE withparts LIKE srcpart") + sql("SET hive.exec.dynamic.partition.mode=nonstrict") + + sql("CREATE TABLE IF NOT EXISTS withparts LIKE srcpart") + sql("INSERT INTO TABLE withparts PARTITION(ds, hr) SELECT key, value, '1', '2' FROM src") + .queryExecution.analyzed + } + + assertResult(2, "Duplicated project detected\n" + analyzedPlan) { + analyzedPlan.collect { + case _: Project => () + }.size + } + } + test("parse HQL set commands") { // Adapted from its SQL counterpart. val testKey = "spark.sql.key.usedfortestonly" diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 46a77dd917..9c1f218253 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1684,36 +1684,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { ) } } - - test("SPARK-16036: better error message when insert into a table with mismatch schema") { - withTable("hive_table", "datasource_table") { - sql("CREATE TABLE hive_table(a INT) PARTITIONED BY (b INT, c INT)") - sql("CREATE TABLE datasource_table(a INT, b INT, c INT) USING parquet PARTITIONED BY (b, c)") - val e1 = intercept[AnalysisException] { - sql("INSERT INTO TABLE hive_table PARTITION(b=1, c=2) SELECT 1, 2, 3") - } - assert(e1.message.contains("the number of columns are different")) - val e2 = intercept[AnalysisException] { - sql("INSERT INTO TABLE datasource_table PARTITION(b=1, c=2) SELECT 1, 2, 3") - } - assert(e2.message.contains("the number of columns are different")) - } - } - - test("SPARK-16037: INSERT statement should match columns by position") { - withTable("hive_table", "datasource_table") { - sql("CREATE TABLE hive_table(a INT) PARTITIONED BY (b INT, c INT)") - sql("CREATE TABLE datasource_table(a INT, b INT, c INT) USING parquet PARTITIONED BY (b, c)") - - withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") { - sql("INSERT INTO TABLE hive_table SELECT 1, 2 AS c, 3 AS b") - checkAnswer(sql("SELECT a, b, c FROM hive_table"), Row(1, 2, 3)) - sql("INSERT OVERWRITE TABLE hive_table SELECT 1, 2, 3") - checkAnswer(sql("SELECT a, b, c FROM hive_table"), Row(1, 2, 3)) - } - - sql("INSERT INTO TABLE datasource_table SELECT 1, 2 AS c, 3 AS b") - checkAnswer(sql("SELECT a, b, c FROM datasource_table"), Row(1, 2, 3)) - } - } } |