From d0ca5797a8fc55a3046cdfad8860ba3a29f72b51 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sun, 24 Apr 2016 19:38:21 -0700 Subject: [SPARK-14876][SQL] SparkSession should be case insensitive by default ## What changes were proposed in this pull request? This patch changes SparkSession to be case insensitive by default, in order to match other database systems. ## How was this patch tested? N/A - I'm sure some tests will fail and I will need to fix those. Author: Reynold Xin Closes #12643 from rxin/SPARK-14876. --- .../org/apache/spark/sql/internal/SQLConf.scala | 4 +- .../scala/org/apache/spark/sql/JoinSuite.scala | 270 +++++++++++---------- .../org/apache/spark/sql/ListTablesSuite.scala | 32 +-- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 91 ++++--- .../apache/spark/sql/hive/HiveSessionState.scala | 1 - 5 files changed, 208 insertions(+), 190 deletions(-) (limited to 'sql') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index acd85db7c0..bbc424cba6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -162,9 +162,9 @@ object SQLConf { .createWithDefault(true) val CASE_SENSITIVE = SQLConfigBuilder("spark.sql.caseSensitive") - .doc("Whether the query analyzer should be case sensitive or not. Default to case sensitive.") + .doc("Whether the query analyzer should be case sensitive or not. Default to case insensitive.") .booleanConf - .createWithDefault(true) + .createWithDefault(false) val PARQUET_SCHEMA_MERGING_ENABLED = SQLConfigBuilder("spark.sql.parquet.mergeSchema") .doc("When true, the Parquet data source merges schemas collected from all data files, " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 9e5a41d57c..ef9bb7ea4f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -149,25 +149,29 @@ class JoinSuite extends QueryTest with SharedSQLContext { } test("inner join where, one match per row") { - checkAnswer( - upperCaseData.join(lowerCaseData).where('n === 'N), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d") - )) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + upperCaseData.join(lowerCaseData).where('n === 'N), + Seq( + Row(1, "A", 1, "a"), + Row(2, "B", 2, "b"), + Row(3, "C", 3, "c"), + Row(4, "D", 4, "d") + )) + } } test("inner join ON, one match per row") { - checkAnswer( - upperCaseData.join(lowerCaseData, $"n" === $"N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d") - )) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + upperCaseData.join(lowerCaseData, $"n" === $"N"), + Seq( + Row(1, "A", 1, "a"), + Row(2, "B", 2, "b"), + Row(3, "C", 3, "c"), + Row(4, "D", 4, "d") + )) + } } test("inner join, where, multiple matches") { @@ -210,129 +214,133 @@ class JoinSuite extends QueryTest with SharedSQLContext { } test("left outer join") { - checkAnswer( - upperCaseData.join(lowerCaseData, $"n" === $"N", "left"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - - checkAnswer( - upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"), - Row(1, "A", null, null) :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - - checkAnswer( - upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"), - Row(1, "A", null, null) :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - - checkAnswer( - upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - - // Make sure we are choosing left.outputPartitioning as the - // outputPartitioning for the outer join operator. - checkAnswer( - sql( - """ - |SELECT l.N, count(*) - |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a) - |GROUP BY l.N - """. - stripMargin), - Row(1, 1) :: - Row(2, 1) :: - Row(3, 1) :: - Row(4, 1) :: - Row(5, 1) :: - Row(6, 1) :: Nil) - - checkAnswer( - sql( - """ - |SELECT r.a, count(*) - |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a) - |GROUP BY r.a - """.stripMargin), - Row(null, 6) :: Nil) - } + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + upperCaseData.join(lowerCaseData, $"n" === $"N", "left"), + Row(1, "A", 1, "a") :: + Row(2, "B", 2, "b") :: + Row(3, "C", 3, "c") :: + Row(4, "D", 4, "d") :: + Row(5, "E", null, null) :: + Row(6, "F", null, null) :: Nil) - test("right outer join") { - checkAnswer( - lowerCaseData.join(upperCaseData, $"n" === $"N", "right"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - checkAnswer( - lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"), - Row(null, null, 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - checkAnswer( - lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"), - Row(null, null, 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - checkAnswer( - lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) + checkAnswer( + upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"), + Row(1, "A", null, null) :: + Row(2, "B", 2, "b") :: + Row(3, "C", 3, "c") :: + Row(4, "D", 4, "d") :: + Row(5, "E", null, null) :: + Row(6, "F", null, null) :: Nil) - // Make sure we are choosing right.outputPartitioning as the - // outputPartitioning for the outer join operator. - checkAnswer( - sql( - """ - |SELECT l.a, count(*) - |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N) - |GROUP BY l.a - """.stripMargin), - Row(null, - 6)) + checkAnswer( + upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"), + Row(1, "A", null, null) :: + Row(2, "B", 2, "b") :: + Row(3, "C", 3, "c") :: + Row(4, "D", 4, "d") :: + Row(5, "E", null, null) :: + Row(6, "F", null, null) :: Nil) - checkAnswer( - sql( - """ - |SELECT r.N, count(*) - |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N) - |GROUP BY r.N - """.stripMargin), - Row(1 - , 1) :: + checkAnswer( + upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"), + Row(1, "A", 1, "a") :: + Row(2, "B", 2, "b") :: + Row(3, "C", 3, "c") :: + Row(4, "D", 4, "d") :: + Row(5, "E", null, null) :: + Row(6, "F", null, null) :: Nil) + + // Make sure we are choosing left.outputPartitioning as the + // outputPartitioning for the outer join operator. + checkAnswer( + sql( + """ + |SELECT l.N, count(*) + |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a) + |GROUP BY l.N + """.stripMargin), + Row( + 1, 1) :: Row(2, 1) :: Row(3, 1) :: Row(4, 1) :: Row(5, 1) :: Row(6, 1) :: Nil) + + checkAnswer( + sql( + """ + |SELECT r.a, count(*) + |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a) + |GROUP BY r.a + """.stripMargin), + Row(null, 6) :: Nil) + } + } + + test("right outer join") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + lowerCaseData.join(upperCaseData, $"n" === $"N", "right"), + Row(1, "a", 1, "A") :: + Row(2, "b", 2, "B") :: + Row(3, "c", 3, "C") :: + Row(4, "d", 4, "D") :: + Row(null, null, 5, "E") :: + Row(null, null, 6, "F") :: Nil) + checkAnswer( + lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"), + Row(null, null, 1, "A") :: + Row(2, "b", 2, "B") :: + Row(3, "c", 3, "C") :: + Row(4, "d", 4, "D") :: + Row(null, null, 5, "E") :: + Row(null, null, 6, "F") :: Nil) + checkAnswer( + lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"), + Row(null, null, 1, "A") :: + Row(2, "b", 2, "B") :: + Row(3, "c", 3, "C") :: + Row(4, "d", 4, "D") :: + Row(null, null, 5, "E") :: + Row(null, null, 6, "F") :: Nil) + checkAnswer( + lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"), + Row(1, "a", 1, "A") :: + Row(2, "b", 2, "B") :: + Row(3, "c", 3, "C") :: + Row(4, "d", 4, "D") :: + Row(null, null, 5, "E") :: + Row(null, null, 6, "F") :: Nil) + + // Make sure we are choosing right.outputPartitioning as the + // outputPartitioning for the outer join operator. + checkAnswer( + sql( + """ + |SELECT l.a, count(*) + |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N) + |GROUP BY l.a + """.stripMargin), + Row(null, + 6)) + + checkAnswer( + sql( + """ + |SELECT r.N, count(*) + |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N) + |GROUP BY r.N + """.stripMargin), + Row(1 + , 1) :: + Row(2, 1) :: + Row(3, 1) :: + Row(4, 1) :: + Row(5, 1) :: + Row(6, 1) :: Nil) + } } test("full outer join") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala index bb54c525cb..9f6c86a575 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala @@ -29,40 +29,40 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value") before { - df.registerTempTable("ListTablesSuiteTable") + df.registerTempTable("listtablessuitetable") } after { sqlContext.sessionState.catalog.dropTable( - TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true) + TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true) } test("get all tables") { checkAnswer( - sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"), - Row("ListTablesSuiteTable", true)) + sqlContext.tables().filter("tableName = 'listtablessuitetable'"), + Row("listtablessuitetable", true)) checkAnswer( - sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"), - Row("ListTablesSuiteTable", true)) + sql("SHOW tables").filter("tableName = 'listtablessuitetable'"), + Row("listtablessuitetable", true)) sqlContext.sessionState.catalog.dropTable( - TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true) - assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) + TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true) + assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0) } test("getting all tables with a database name has no impact on returned table names") { checkAnswer( - sqlContext.tables("default").filter("tableName = 'ListTablesSuiteTable'"), - Row("ListTablesSuiteTable", true)) + sqlContext.tables("default").filter("tableName = 'listtablessuitetable'"), + Row("listtablessuitetable", true)) checkAnswer( - sql("show TABLES in default").filter("tableName = 'ListTablesSuiteTable'"), - Row("ListTablesSuiteTable", true)) + sql("show TABLES in default").filter("tableName = 'listtablessuitetable'"), + Row("listtablessuitetable", true)) sqlContext.sessionState.catalog.dropTable( - TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true) - assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) + TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true) + assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0) } test("query the returned DataFrame of tables") { @@ -77,8 +77,8 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex tableDF.registerTempTable("tables") checkAnswer( sql( - "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"), - Row(true, "ListTablesSuiteTable") + "SELECT isTemporary, tableName from tables WHERE tableName = 'listtablessuitetable'"), + Row(true, "listtablessuitetable") ) checkAnswer( sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 29521afdd8..5065e5b80b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -754,36 +754,43 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } test("inner join where, one match per row") { - checkAnswer( - sql("SELECT * FROM upperCaseData JOIN lowerCaseData WHERE n = N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d"))) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + sql("SELECT * FROM uppercasedata JOIN lowercasedata WHERE n = N"), + Seq( + Row(1, "A", 1, "a"), + Row(2, "B", 2, "b"), + Row(3, "C", 3, "c"), + Row(4, "D", 4, "d"))) + } } test("inner join ON, one match per row") { - checkAnswer( - sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON n = N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d"))) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + sql("SELECT * FROM uppercasedata JOIN lowercasedata ON n = N"), + Seq( + Row(1, "A", 1, "a"), + Row(2, "B", 2, "b"), + Row(3, "C", 3, "c"), + Row(4, "D", 4, "d"))) + } } test("inner join, where, multiple matches") { - checkAnswer( - sql(""" - |SELECT * FROM - | (SELECT * FROM testData2 WHERE a = 1) x JOIN - | (SELECT * FROM testData2 WHERE a = 1) y - |WHERE x.a = y.a""".stripMargin), - Row(1, 1, 1, 1) :: - Row(1, 1, 1, 2) :: - Row(1, 2, 1, 1) :: - Row(1, 2, 1, 2) :: Nil) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + sql( + """ + |SELECT * FROM + | (SELECT * FROM testdata2 WHERE a = 1) x JOIN + | (SELECT * FROM testdata2 WHERE a = 1) y + |WHERE x.a = y.a""".stripMargin), + Row(1, 1, 1, 1) :: + Row(1, 1, 1, 2) :: + Row(1, 2, 1, 1) :: + Row(1, 2, 1, 2) :: Nil) + } } test("inner join, no matches") { @@ -825,25 +832,29 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } test("left outer join") { - checkAnswer( - sql("SELECT * FROM upperCaseData LEFT OUTER JOIN lowerCaseData ON n = N"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"), + Row(1, "A", 1, "a") :: + Row(2, "B", 2, "b") :: + Row(3, "C", 3, "c") :: + Row(4, "D", 4, "d") :: + Row(5, "E", null, null) :: + Row(6, "F", null, null) :: Nil) + } } test("right outer join") { - checkAnswer( - sql("SELECT * FROM lowerCaseData RIGHT OUTER JOIN upperCaseData ON n = N"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + checkAnswer( + sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"), + Row(1, "a", 1, "A") :: + Row(2, "b", 2, "B") :: + Row(3, "c", 3, "C") :: + Row(4, "d", 4, "D") :: + Row(null, null, 5, "E") :: + Row(null, null, 6, "F") :: Nil) + } } test("full outer join") { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala index 636bfdebf7..64b9d8424a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala @@ -139,7 +139,6 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx) */ def setDefaultOverrideConfs(): Unit = { setConf(ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS.varname, "false") - conf.setConfString("spark.sql.caseSensitive", "false") } override def setConf(key: String, value: String): Unit = { -- cgit v1.2.3