From d0ca5797a8fc55a3046cdfad8860ba3a29f72b51 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sun, 24 Apr 2016 19:38:21 -0700
Subject: [SPARK-14876][SQL] SparkSession should be case insensitive by default

## What changes were proposed in this pull request?
This patch changes SparkSession to be case insensitive by default, in order to match other database systems.

## How was this patch tested?
N/A - I'm sure some tests will fail and I will need to fix those.

Author: Reynold Xin <rxin@databricks.com>

Closes #12643 from rxin/SPARK-14876.
---
 .../org/apache/spark/sql/internal/SQLConf.scala    |   4 +-
 .../scala/org/apache/spark/sql/JoinSuite.scala     | 270 +++++++++++----------
 .../org/apache/spark/sql/ListTablesSuite.scala     |  32 +--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala |  91 ++++---
 .../apache/spark/sql/hive/HiveSessionState.scala   |   1 -
 5 files changed, 208 insertions(+), 190 deletions(-)

(limited to 'sql')

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index acd85db7c0..bbc424cba6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -162,9 +162,9 @@ object SQLConf {
       .createWithDefault(true)
 
   val CASE_SENSITIVE = SQLConfigBuilder("spark.sql.caseSensitive")
-    .doc("Whether the query analyzer should be case sensitive or not. Default to case sensitive.")
+    .doc("Whether the query analyzer should be case sensitive or not. Default to case insensitive.")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
 
   val PARQUET_SCHEMA_MERGING_ENABLED = SQLConfigBuilder("spark.sql.parquet.mergeSchema")
     .doc("When true, the Parquet data source merges schemas collected from all data files, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 9e5a41d57c..ef9bb7ea4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -149,25 +149,29 @@ class JoinSuite extends QueryTest with SharedSQLContext {
   }
 
   test("inner join where, one match per row") {
-    checkAnswer(
-      upperCaseData.join(lowerCaseData).where('n === 'N),
-      Seq(
-        Row(1, "A", 1, "a"),
-        Row(2, "B", 2, "b"),
-        Row(3, "C", 3, "c"),
-        Row(4, "D", 4, "d")
-      ))
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        upperCaseData.join(lowerCaseData).where('n === 'N),
+        Seq(
+          Row(1, "A", 1, "a"),
+          Row(2, "B", 2, "b"),
+          Row(3, "C", 3, "c"),
+          Row(4, "D", 4, "d")
+        ))
+    }
   }
 
   test("inner join ON, one match per row") {
-    checkAnswer(
-      upperCaseData.join(lowerCaseData, $"n" === $"N"),
-      Seq(
-        Row(1, "A", 1, "a"),
-        Row(2, "B", 2, "b"),
-        Row(3, "C", 3, "c"),
-        Row(4, "D", 4, "d")
-      ))
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        upperCaseData.join(lowerCaseData, $"n" === $"N"),
+        Seq(
+          Row(1, "A", 1, "a"),
+          Row(2, "B", 2, "b"),
+          Row(3, "C", 3, "c"),
+          Row(4, "D", 4, "d")
+        ))
+    }
   }
 
   test("inner join, where, multiple matches") {
@@ -210,129 +214,133 @@ class JoinSuite extends QueryTest with SharedSQLContext {
   }
 
   test("left outer join") {
-    checkAnswer(
-      upperCaseData.join(lowerCaseData, $"n" === $"N", "left"),
-      Row(1, "A", 1, "a") ::
-        Row(2, "B", 2, "b") ::
-        Row(3, "C", 3, "c") ::
-        Row(4, "D", 4, "d") ::
-        Row(5, "E", null, null) ::
-        Row(6, "F", null, null) :: Nil)
-
-    checkAnswer(
-      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"),
-      Row(1, "A", null, null) ::
-        Row(2, "B", 2, "b") ::
-        Row(3, "C", 3, "c") ::
-        Row(4, "D", 4, "d") ::
-        Row(5, "E", null, null) ::
-        Row(6, "F", null, null) :: Nil)
-
-    checkAnswer(
-      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"),
-      Row(1, "A", null, null) ::
-        Row(2, "B", 2, "b") ::
-        Row(3, "C", 3, "c") ::
-        Row(4, "D", 4, "d") ::
-        Row(5, "E", null, null) ::
-        Row(6, "F", null, null) :: Nil)
-
-    checkAnswer(
-      upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"),
-      Row(1, "A", 1, "a") ::
-        Row(2, "B", 2, "b") ::
-        Row(3, "C", 3, "c") ::
-        Row(4, "D", 4, "d") ::
-        Row(5, "E", null, null) ::
-        Row(6, "F", null, null) :: Nil)
-
-    // Make sure we are choosing left.outputPartitioning as the
-    // outputPartitioning for the outer join operator.
-    checkAnswer(
-      sql(
-        """
-        |SELECT l.N, count(*)
-        |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
-        |GROUP BY l.N
-      """.
-          stripMargin),
-    Row(1, 1) ::
-      Row(2, 1) ::
-      Row(3, 1) ::
-      Row(4, 1) ::
-      Row(5, 1) ::
-      Row(6, 1) :: Nil)
-
-    checkAnswer(
-      sql(
-        """
-          |SELECT r.a, count(*)
-          |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
-          |GROUP BY r.a
-        """.stripMargin),
-      Row(null, 6) :: Nil)
-  }
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        upperCaseData.join(lowerCaseData, $"n" === $"N", "left"),
+        Row(1, "A", 1, "a") ::
+          Row(2, "B", 2, "b") ::
+          Row(3, "C", 3, "c") ::
+          Row(4, "D", 4, "d") ::
+          Row(5, "E", null, null) ::
+          Row(6, "F", null, null) :: Nil)
 
-  test("right outer join") {
-    checkAnswer(
-      lowerCaseData.join(upperCaseData, $"n" === $"N", "right"),
-      Row(1, "a", 1, "A") ::
-        Row(2, "b", 2, "B") ::
-        Row(3, "c", 3, "C") ::
-        Row(4, "d", 4, "D") ::
-        Row(null, null, 5, "E") ::
-        Row(null, null, 6, "F") :: Nil)
-    checkAnswer(
-      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"),
-      Row(null, null, 1, "A") ::
-        Row(2, "b", 2, "B") ::
-        Row(3, "c", 3, "C") ::
-        Row(4, "d", 4, "D") ::
-        Row(null, null, 5, "E") ::
-        Row(null, null, 6, "F") :: Nil)
-    checkAnswer(
-      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"),
-      Row(null, null, 1, "A") ::
-        Row(2, "b", 2, "B") ::
-        Row(3, "c", 3, "C") ::
-        Row(4, "d", 4, "D") ::
-        Row(null, null, 5, "E") ::
-        Row(null, null, 6, "F") :: Nil)
-    checkAnswer(
-      lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"),
-      Row(1, "a", 1, "A") ::
-        Row(2, "b", 2, "B") ::
-        Row(3, "c", 3, "C") ::
-        Row(4, "d", 4, "D") ::
-        Row(null, null, 5, "E") ::
-        Row(null, null, 6, "F") :: Nil)
+      checkAnswer(
+        upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"),
+        Row(1, "A", null, null) ::
+          Row(2, "B", 2, "b") ::
+          Row(3, "C", 3, "c") ::
+          Row(4, "D", 4, "d") ::
+          Row(5, "E", null, null) ::
+          Row(6, "F", null, null) :: Nil)
 
-    // Make sure we are choosing right.outputPartitioning as the
-    // outputPartitioning for the outer join operator.
-    checkAnswer(
-      sql(
-        """
-          |SELECT l.a, count(*)
-          |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
-          |GROUP BY l.a
-        """.stripMargin),
-      Row(null,
-        6))
+      checkAnswer(
+        upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"),
+        Row(1, "A", null, null) ::
+          Row(2, "B", 2, "b") ::
+          Row(3, "C", 3, "c") ::
+          Row(4, "D", 4, "d") ::
+          Row(5, "E", null, null) ::
+          Row(6, "F", null, null) :: Nil)
 
-    checkAnswer(
-      sql(
-        """
-          |SELECT r.N, count(*)
-          |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
-          |GROUP BY r.N
-        """.stripMargin),
-      Row(1
-        , 1) ::
+      checkAnswer(
+        upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"),
+        Row(1, "A", 1, "a") ::
+          Row(2, "B", 2, "b") ::
+          Row(3, "C", 3, "c") ::
+          Row(4, "D", 4, "d") ::
+          Row(5, "E", null, null) ::
+          Row(6, "F", null, null) :: Nil)
+
+      // Make sure we are choosing left.outputPartitioning as the
+      // outputPartitioning for the outer join operator.
+      checkAnswer(
+        sql(
+          """
+          |SELECT l.N, count(*)
+          |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a)
+          |GROUP BY l.N
+          """.stripMargin),
+      Row(
+        1, 1) ::
         Row(2, 1) ::
         Row(3, 1) ::
         Row(4, 1) ::
         Row(5, 1) ::
         Row(6, 1) :: Nil)
+
+      checkAnswer(
+        sql(
+          """
+            |SELECT r.a, count(*)
+            |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a)
+            |GROUP BY r.a
+          """.stripMargin),
+        Row(null, 6) :: Nil)
+    }
+  }
+
+  test("right outer join") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        lowerCaseData.join(upperCaseData, $"n" === $"N", "right"),
+        Row(1, "a", 1, "A") ::
+          Row(2, "b", 2, "B") ::
+          Row(3, "c", 3, "C") ::
+          Row(4, "d", 4, "D") ::
+          Row(null, null, 5, "E") ::
+          Row(null, null, 6, "F") :: Nil)
+      checkAnswer(
+        lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"),
+        Row(null, null, 1, "A") ::
+          Row(2, "b", 2, "B") ::
+          Row(3, "c", 3, "C") ::
+          Row(4, "d", 4, "D") ::
+          Row(null, null, 5, "E") ::
+          Row(null, null, 6, "F") :: Nil)
+      checkAnswer(
+        lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"),
+        Row(null, null, 1, "A") ::
+          Row(2, "b", 2, "B") ::
+          Row(3, "c", 3, "C") ::
+          Row(4, "d", 4, "D") ::
+          Row(null, null, 5, "E") ::
+          Row(null, null, 6, "F") :: Nil)
+      checkAnswer(
+        lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"),
+        Row(1, "a", 1, "A") ::
+          Row(2, "b", 2, "B") ::
+          Row(3, "c", 3, "C") ::
+          Row(4, "d", 4, "D") ::
+          Row(null, null, 5, "E") ::
+          Row(null, null, 6, "F") :: Nil)
+
+      // Make sure we are choosing right.outputPartitioning as the
+      // outputPartitioning for the outer join operator.
+      checkAnswer(
+        sql(
+          """
+            |SELECT l.a, count(*)
+            |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N)
+            |GROUP BY l.a
+          """.stripMargin),
+        Row(null,
+          6))
+
+      checkAnswer(
+        sql(
+          """
+            |SELECT r.N, count(*)
+            |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N)
+            |GROUP BY r.N
+          """.stripMargin),
+        Row(1
+          , 1) ::
+          Row(2, 1) ::
+          Row(3, 1) ::
+          Row(4, 1) ::
+          Row(5, 1) ::
+          Row(6, 1) :: Nil)
+    }
   }
 
   test("full outer join") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
index bb54c525cb..9f6c86a575 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
@@ -29,40 +29,40 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex
   private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value")
 
   before {
-    df.registerTempTable("ListTablesSuiteTable")
+    df.registerTempTable("listtablessuitetable")
   }
 
   after {
     sqlContext.sessionState.catalog.dropTable(
-      TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
+      TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
   }
 
   test("get all tables") {
     checkAnswer(
-      sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"),
-      Row("ListTablesSuiteTable", true))
+      sqlContext.tables().filter("tableName = 'listtablessuitetable'"),
+      Row("listtablessuitetable", true))
 
     checkAnswer(
-      sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"),
-      Row("ListTablesSuiteTable", true))
+      sql("SHOW tables").filter("tableName = 'listtablessuitetable'"),
+      Row("listtablessuitetable", true))
 
     sqlContext.sessionState.catalog.dropTable(
-      TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
-    assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
+      TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
+    assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0)
   }
 
   test("getting all tables with a database name has no impact on returned table names") {
     checkAnswer(
-      sqlContext.tables("default").filter("tableName = 'ListTablesSuiteTable'"),
-      Row("ListTablesSuiteTable", true))
+      sqlContext.tables("default").filter("tableName = 'listtablessuitetable'"),
+      Row("listtablessuitetable", true))
 
     checkAnswer(
-      sql("show TABLES in default").filter("tableName = 'ListTablesSuiteTable'"),
-      Row("ListTablesSuiteTable", true))
+      sql("show TABLES in default").filter("tableName = 'listtablessuitetable'"),
+      Row("listtablessuitetable", true))
 
     sqlContext.sessionState.catalog.dropTable(
-      TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
-    assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
+      TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
+    assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0)
   }
 
   test("query the returned DataFrame of tables") {
@@ -77,8 +77,8 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex
         tableDF.registerTempTable("tables")
         checkAnswer(
           sql(
-            "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"),
-          Row(true, "ListTablesSuiteTable")
+            "SELECT isTemporary, tableName from tables WHERE tableName = 'listtablessuitetable'"),
+          Row(true, "listtablessuitetable")
         )
         checkAnswer(
           sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 29521afdd8..5065e5b80b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -754,36 +754,43 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("inner join where, one match per row") {
-    checkAnswer(
-      sql("SELECT * FROM upperCaseData JOIN lowerCaseData WHERE n = N"),
-      Seq(
-        Row(1, "A", 1, "a"),
-        Row(2, "B", 2, "b"),
-        Row(3, "C", 3, "c"),
-        Row(4, "D", 4, "d")))
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        sql("SELECT * FROM uppercasedata JOIN lowercasedata WHERE n = N"),
+        Seq(
+          Row(1, "A", 1, "a"),
+          Row(2, "B", 2, "b"),
+          Row(3, "C", 3, "c"),
+          Row(4, "D", 4, "d")))
+    }
   }
 
   test("inner join ON, one match per row") {
-    checkAnswer(
-      sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON n = N"),
-      Seq(
-        Row(1, "A", 1, "a"),
-        Row(2, "B", 2, "b"),
-        Row(3, "C", 3, "c"),
-        Row(4, "D", 4, "d")))
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        sql("SELECT * FROM uppercasedata JOIN lowercasedata ON n = N"),
+        Seq(
+          Row(1, "A", 1, "a"),
+          Row(2, "B", 2, "b"),
+          Row(3, "C", 3, "c"),
+          Row(4, "D", 4, "d")))
+    }
   }
 
   test("inner join, where, multiple matches") {
-    checkAnswer(
-      sql("""
-        |SELECT * FROM
-        |  (SELECT * FROM testData2 WHERE a = 1) x JOIN
-        |  (SELECT * FROM testData2 WHERE a = 1) y
-        |WHERE x.a = y.a""".stripMargin),
-      Row(1, 1, 1, 1) ::
-      Row(1, 1, 1, 2) ::
-      Row(1, 2, 1, 1) ::
-      Row(1, 2, 1, 2) :: Nil)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        sql(
+          """
+          |SELECT * FROM
+          |  (SELECT * FROM testdata2 WHERE a = 1) x JOIN
+          |  (SELECT * FROM testdata2 WHERE a = 1) y
+          |WHERE x.a = y.a""".stripMargin),
+        Row(1, 1, 1, 1) ::
+        Row(1, 1, 1, 2) ::
+        Row(1, 2, 1, 1) ::
+        Row(1, 2, 1, 2) :: Nil)
+    }
   }
 
   test("inner join, no matches") {
@@ -825,25 +832,29 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
   }
 
   test("left outer join") {
-    checkAnswer(
-      sql("SELECT * FROM upperCaseData LEFT OUTER JOIN lowerCaseData ON n = N"),
-      Row(1, "A", 1, "a") ::
-      Row(2, "B", 2, "b") ::
-      Row(3, "C", 3, "c") ::
-      Row(4, "D", 4, "d") ::
-      Row(5, "E", null, null) ::
-      Row(6, "F", null, null) :: Nil)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"),
+        Row(1, "A", 1, "a") ::
+          Row(2, "B", 2, "b") ::
+          Row(3, "C", 3, "c") ::
+          Row(4, "D", 4, "d") ::
+          Row(5, "E", null, null) ::
+          Row(6, "F", null, null) :: Nil)
+    }
   }
 
   test("right outer join") {
-    checkAnswer(
-      sql("SELECT * FROM lowerCaseData RIGHT OUTER JOIN upperCaseData ON n = N"),
-      Row(1, "a", 1, "A") ::
-      Row(2, "b", 2, "B") ::
-      Row(3, "c", 3, "C") ::
-      Row(4, "d", 4, "D") ::
-      Row(null, null, 5, "E") ::
-      Row(null, null, 6, "F") :: Nil)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"),
+        Row(1, "a", 1, "A") ::
+          Row(2, "b", 2, "B") ::
+          Row(3, "c", 3, "C") ::
+          Row(4, "d", 4, "D") ::
+          Row(null, null, 5, "E") ::
+          Row(null, null, 6, "F") :: Nil)
+    }
   }
 
   test("full outer join") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 636bfdebf7..64b9d8424a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -139,7 +139,6 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
    */
   def setDefaultOverrideConfs(): Unit = {
     setConf(ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS.varname, "false")
-    conf.setConfString("spark.sql.caseSensitive", "false")
   }
 
   override def setConf(key: String, value: String): Unit = {
-- 
cgit v1.2.3