diff options
author | Cheng Lian <lian@databricks.com> | 2016-01-26 20:30:13 -0800 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-01-26 20:30:13 -0800 |
commit | 58f5d8c1da6feeb598aa5f74ffe1593d4839d11d (patch) | |
tree | 635abe8a8aaad075cff00316b0a421c0259f84df /sql/core | |
parent | ce38a35b764397fcf561ac81de6da96579f5c13e (diff) | |
download | spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.gz spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.bz2 spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.zip |
[SPARK-12728][SQL] Integrates SQL generation with native view
This PR is a follow-up of PR #10541. It integrates the newly introduced SQL generation feature with native view to make native view canonical.
In this PR, a new SQL option `spark.sql.nativeView.canonical` is added. When this option and `spark.sql.nativeView` are both `true`, Spark SQL tries to handle `CREATE VIEW` DDL statements using SQL query strings generated from view definition logical plans. If we failed to map the plan to SQL, we fallback to the original native view approach.
One important issue this PR fixes is that, now we can use CTE when defining a view. Originally, when native view is turned on, we wrap the view definition text with an extra `SELECT`. However, HiveQL parser doesn't allow CTE appearing as a subquery. Namely, something like this is disallowed:
```sql
SELECT n
FROM (
WITH w AS (SELECT 1 AS n)
SELECT * FROM w
) v
```
This PR fixes this issue because the extra `SELECT` is no longer needed (also, CTE expressions are inlined as subqueries during analysis phase, thus there won't be CTE expressions in the generated SQL query string).
Author: Cheng Lian <lian@databricks.com>
Author: Yin Huai <yhuai@databricks.com>
Closes #10733 from liancheng/spark-12728.integrate-sql-gen-with-native-view.
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala | 10 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala | 13 |
2 files changed, 23 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index 2d664d3ee6..c9ba670099 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -367,6 +367,14 @@ private[spark] object SQLConf { "possible, or you may get wrong result.", isPublic = false) + val CANONICAL_NATIVE_VIEW = booleanConf("spark.sql.nativeView.canonical", + defaultValue = Some(true), + doc = "When this option and spark.sql.nativeView are both true, Spark SQL tries to handle " + + "CREATE VIEW statement using SQL query string generated from view definition logical " + + "plan. If the logical plan doesn't have a SQL representation, we fallback to the " + + "original native view implementation.", + isPublic = false) + val COLUMN_NAME_OF_CORRUPT_RECORD = stringConf("spark.sql.columnNameOfCorruptRecord", defaultValue = Some("_corrupt_record"), doc = "The name of internal column for storing raw/un-parsed JSON records that fail to parse.") @@ -550,6 +558,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with ParserCon private[spark] def wholeStageEnabled: Boolean = getConf(WHOLESTAGE_CODEGEN_ENABLED) + private[spark] def canonicalView: Boolean = getConf(CANONICAL_NATIVE_VIEW) + def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE) private[spark] def subexpressionEliminationEnabled: Boolean = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala index 5f73d71d45..d48143762c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -155,8 +155,21 @@ private[sql] trait SQLTestUtils } /** + * Drops view `viewName` after calling `f`. + */ + protected def withView(viewNames: String*)(f: => Unit): Unit = { + try f finally { + viewNames.foreach { name => + sqlContext.sql(s"DROP VIEW IF EXISTS $name") + } + } + } + + /** * Creates a temporary database and switches current database to it before executing `f`. This * database is dropped after `f` returns. + * + * Note that this method doesn't switch current database before executing `f`. */ protected def withTempDatabase(f: String => Unit): Unit = { val dbName = s"db_${UUID.randomUUID().toString.replace('-', '_')}" |