From dc1dbf206e0076a43ad2120d8bb5b1fc6912fe25 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian.cs.zju@gmail.com>
Date: Mon, 8 Sep 2014 19:08:05 -0700
Subject: [SPARK-3414][SQL] Stores analyzed logical plan when registering a
 temp table

Case insensitivity breaks when unresolved relation contains attributes with uppercase letters in their names, because we store unanalyzed logical plan when registering temp tables while the `CaseInsensitivityAttributeReferences` batch runs before the `Resolution` batch. To fix this issue, we need to store analyzed logical plan.

Author: Cheng Lian <lian.cs.zju@gmail.com>

Closes #2293 from liancheng/spark-3414 and squashes the following commits:

d9fa1d6 [Cheng Lian] Stores analyzed logical plan when registering a temp table
---
 .../scala/org/apache/spark/sql/SQLContext.scala    |  4 ++--
 .../spark/sql/hive/execution/HiveQuerySuite.scala  | 25 +++++++++++++++++++---
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'sql')

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 5acb45c155..a2f334aab9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -246,7 +246,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
    * @group userf
    */
   def registerRDDAsTable(rdd: SchemaRDD, tableName: String): Unit = {
-    catalog.registerTable(None, tableName, rdd.logicalPlan)
+    catalog.registerTable(None, tableName, rdd.queryExecution.analyzed)
   }
 
   /**
@@ -411,7 +411,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
     protected def stringOrError[A](f: => A): String =
       try f.toString catch { case e: Throwable => e.toString }
 
-    def simpleString: String = 
+    def simpleString: String =
       s"""== Physical Plan ==
          |${stringOrError(executedPlan)}
       """
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index f4217a52c3..305998c150 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -17,11 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.File
-
 import scala.util.Try
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.hive.test.TestHive._
@@ -514,6 +511,28 @@ class HiveQuerySuite extends HiveComparisonTest {
     sql("DROP TABLE alter1")
   }
 
+  case class LogEntry(filename: String, message: String)
+  case class LogFile(name: String)
+
+  test("SPARK-3414 regression: should store analyzed logical plan when registering a temp table") {
+    sparkContext.makeRDD(Seq.empty[LogEntry]).registerTempTable("rawLogs")
+    sparkContext.makeRDD(Seq.empty[LogFile]).registerTempTable("logFiles")
+
+    sql(
+      """
+      SELECT name, message
+      FROM rawLogs
+      JOIN (
+        SELECT name
+        FROM logFiles
+      ) files
+      ON rawLogs.filename = files.name
+      """).registerTempTable("boom")
+
+    // This should be successfully analyzed
+    sql("SELECT * FROM boom").queryExecution.analyzed
+  }
+
   test("parse HQL set commands") {
     // Adapted from its SQL counterpart.
     val testKey = "spark.sql.key.usedfortestonly"
-- 
cgit v1.2.3