[SPARK-12728][SQL] Integrates SQL generation with native view

This PR is a follow-up of PR #10541. It integrates the newly introduced SQL generation feature with native view to make native view canonical. In this PR, a new SQL option `spark.sql.nativeView.canonical` is added. When this option and `spark.sql.nativeView` are both `true`, Spark SQL tries to handle `CREATE VIEW` DDL statements using SQL query strings generated from view definition logical plans. If we failed to map the plan to SQL, we fallback to the original native view approach. One important issue this PR fixes is that, now we can use CTE when defining a view. Originally, when native view is turned on, we wrap the view definition text with an extra `SELECT`. However, HiveQL parser doesn't allow CTE appearing as a subquery. Namely, something like this is disallowed: ```sql SELECT n FROM ( WITH w AS (SELECT 1 AS n) SELECT * FROM w ) v ``` This PR fixes this issue because the extra `SELECT` is no longer needed (also, CTE expressions are inlined as subqueries during analysis phase, thus there won't be CTE expressions in the generated SQL query string). Author: Cheng Lian <lian@databricks.com> Author: Yin Huai <yhuai@databricks.com> Closes #10733 from liancheng/spark-12728.integrate-sql-gen-with-native-view.
author: Cheng Lian <lian@databricks.com> 2016-01-26 20:30:13 -0800
committer: Yin Huai <yhuai@databricks.com> 2016-01-26 20:30:13 -0800
commit: 58f5d8c1da6feeb598aa5f74ffe1593d4839d11d (patch)
tree: 635abe8a8aaad075cff00316b0a421c0259f84df /sql/core
parent: ce38a35b764397fcf561ac81de6da96579f5c13e (diff)
download: spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.gz
spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.bz2
spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.zip
2 files changed, 23 insertions, 0 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
index 2d664d3ee6..c9ba670099 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -367,6 +367,14 @@ private[spark] object SQLConf {
           "possible, or you may get wrong result.",
     isPublic = false)
 
+  val CANONICAL_NATIVE_VIEW = booleanConf("spark.sql.nativeView.canonical",
+    defaultValue = Some(true),
+    doc = "When this option and spark.sql.nativeView are both true, Spark SQL tries to handle " +
+          "CREATE VIEW statement using SQL query string generated from view definition logical " +
+          "plan.  If the logical plan doesn't have a SQL representation, we fallback to the " +
+          "original native view implementation.",
+    isPublic = false)
+
   val COLUMN_NAME_OF_CORRUPT_RECORD = stringConf("spark.sql.columnNameOfCorruptRecord",
     defaultValue = Some("_corrupt_record"),
     doc = "The name of internal column for storing raw/un-parsed JSON records that fail to parse.")
@@ -550,6 +558,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with ParserCon
 
   private[spark] def wholeStageEnabled: Boolean = getConf(WHOLESTAGE_CODEGEN_ENABLED)
 
+  private[spark] def canonicalView: Boolean = getConf(CANONICAL_NATIVE_VIEW)
+
   def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
 
   private[spark] def subexpressionEliminationEnabled: Boolean =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 5f73d71d45..d48143762c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -155,8 +155,21 @@ private[sql] trait SQLTestUtils
   }
 
   /**
+   * Drops view `viewName` after calling `f`.
+   */
+  protected def withView(viewNames: String*)(f: => Unit): Unit = {
+    try f finally {
+      viewNames.foreach { name =>
+        sqlContext.sql(s"DROP VIEW IF EXISTS $name")
+      }
+    }
+  }
+
+  /**
    * Creates a temporary database and switches current database to it before executing `f`.  This
    * database is dropped after `f` returns.
+   *
+   * Note that this method doesn't switch current database before executing `f`.
    */
   protected def withTempDatabase(f: String => Unit): Unit = {
     val dbName = s"db_${UUID.randomUUID().toString.replace('-', '_')}"
author	Cheng Lian <lian@databricks.com>	2016-01-26 20:30:13 -0800
committer	Yin Huai <yhuai@databricks.com>	2016-01-26 20:30:13 -0800
commit	58f5d8c1da6feeb598aa5f74ffe1593d4839d11d (patch)
tree	635abe8a8aaad075cff00316b0a421c0259f84df /sql/core
parent	ce38a35b764397fcf561ac81de6da96579f5c13e (diff)
download	spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.gz spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.tar.bz2 spark-58f5d8c1da6feeb598aa5f74ffe1593d4839d11d.zip