aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-04-24 19:38:21 -0700
committerReynold Xin <rxin@databricks.com>2016-04-24 19:38:21 -0700
commitd0ca5797a8fc55a3046cdfad8860ba3a29f72b51 (patch)
tree6df48064d4aff7caa8707763b2b833a5ac6acfad
parent0c8e5332ff6a353a854a924ee7435708abfe0a52 (diff)
downloadspark-d0ca5797a8fc55a3046cdfad8860ba3a29f72b51.tar.gz
spark-d0ca5797a8fc55a3046cdfad8860ba3a29f72b51.tar.bz2
spark-d0ca5797a8fc55a3046cdfad8860ba3a29f72b51.zip
[SPARK-14876][SQL] SparkSession should be case insensitive by default
## What changes were proposed in this pull request? This patch changes SparkSession to be case insensitive by default, in order to match other database systems. ## How was this patch tested? N/A - I'm sure some tests will fail and I will need to fix those. Author: Reynold Xin <rxin@databricks.com> Closes #12643 from rxin/SPARK-14876.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala270
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala32
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala91
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala1
5 files changed, 208 insertions, 190 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index acd85db7c0..bbc424cba6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -162,9 +162,9 @@ object SQLConf {
.createWithDefault(true)
val CASE_SENSITIVE = SQLConfigBuilder("spark.sql.caseSensitive")
- .doc("Whether the query analyzer should be case sensitive or not. Default to case sensitive.")
+ .doc("Whether the query analyzer should be case sensitive or not. Default to case insensitive.")
.booleanConf
- .createWithDefault(true)
+ .createWithDefault(false)
val PARQUET_SCHEMA_MERGING_ENABLED = SQLConfigBuilder("spark.sql.parquet.mergeSchema")
.doc("When true, the Parquet data source merges schemas collected from all data files, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 9e5a41d57c..ef9bb7ea4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -149,25 +149,29 @@ class JoinSuite extends QueryTest with SharedSQLContext {
}
test("inner join where, one match per row") {
- checkAnswer(
- upperCaseData.join(lowerCaseData).where('n === 'N),
- Seq(
- Row(1, "A", 1, "a"),
- Row(2, "B", 2, "b"),
- Row(3, "C", 3, "c"),
- Row(4, "D", 4, "d")
- ))
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ upperCaseData.join(lowerCaseData).where('n === 'N),
+ Seq(
+ Row(1, "A", 1, "a"),
+ Row(2, "B", 2, "b"),
+ Row(3, "C", 3, "c"),
+ Row(4, "D", 4, "d")
+ ))
+ }
}
test("inner join ON, one match per row") {
- checkAnswer(
- upperCaseData.join(lowerCaseData, $"n" === $"N"),
- Seq(
- Row(1, "A", 1, "a"),
- Row(2, "B", 2, "b"),
- Row(3, "C", 3, "c"),
- Row(4, "D", 4, "d")
- ))
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ upperCaseData.join(lowerCaseData, $"n" === $"N"),
+ Seq(
+ Row(1, "A", 1, "a"),
+ Row(2, "B", 2, "b"),
+ Row(3, "C", 3, "c"),
+ Row(4, "D", 4, "d")
+ ))
+ }
}
test("inner join, where, multiple matches") {
@@ -210,129 +214,133 @@ class JoinSuite extends QueryTest with SharedSQLContext {
}
test("left outer join") {
- checkAnswer(
- upperCaseData.join(lowerCaseData, $"n" === $"N", "left"),
- Row(1, "A", 1, "a") ::
- Row(2, "B", 2, "b") ::
- Row(3, "C", 3, "c") ::
- Row(4, "D", 4, "d") ::
- Row(5, "E", null, null) ::
- Row(6, "F", null, null) :: Nil)
-
- checkAnswer(
- upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"),
- Row(1, "A", null, null) ::
- Row(2, "B", 2, "b") ::
- Row(3, "C", 3, "c") ::
- Row(4, "D", 4, "d") ::
- Row(5, "E", null, null) ::
- Row(6, "F", null, null) :: Nil)
-
- checkAnswer(
- upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"),
- Row(1, "A", null, null) ::
- Row(2, "B", 2, "b") ::
- Row(3, "C", 3, "c") ::
- Row(4, "D", 4, "d") ::
- Row(5, "E", null, null) ::
- Row(6, "F", null, null) :: Nil)
-
- checkAnswer(
- upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"),
- Row(1, "A", 1, "a") ::
- Row(2, "B", 2, "b") ::
- Row(3, "C", 3, "c") ::
- Row(4, "D", 4, "d") ::
- Row(5, "E", null, null) ::
- Row(6, "F", null, null) :: Nil)
-
- // Make sure we are choosing left.outputPartitioning as the
- // outputPartitioning for the outer join operator.
- checkAnswer(
- sql(
- """
- |SELECT l.N, count(*)
- |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
- |GROUP BY l.N
- """.
- stripMargin),
- Row(1, 1) ::
- Row(2, 1) ::
- Row(3, 1) ::
- Row(4, 1) ::
- Row(5, 1) ::
- Row(6, 1) :: Nil)
-
- checkAnswer(
- sql(
- """
- |SELECT r.a, count(*)
- |FROM upperCaseData l LEFT OUTER JOIN allNulls r ON (l.N = r.a)
- |GROUP BY r.a
- """.stripMargin),
- Row(null, 6) :: Nil)
- }
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ upperCaseData.join(lowerCaseData, $"n" === $"N", "left"),
+ Row(1, "A", 1, "a") ::
+ Row(2, "B", 2, "b") ::
+ Row(3, "C", 3, "c") ::
+ Row(4, "D", 4, "d") ::
+ Row(5, "E", null, null) ::
+ Row(6, "F", null, null) :: Nil)
- test("right outer join") {
- checkAnswer(
- lowerCaseData.join(upperCaseData, $"n" === $"N", "right"),
- Row(1, "a", 1, "A") ::
- Row(2, "b", 2, "B") ::
- Row(3, "c", 3, "C") ::
- Row(4, "d", 4, "D") ::
- Row(null, null, 5, "E") ::
- Row(null, null, 6, "F") :: Nil)
- checkAnswer(
- lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"),
- Row(null, null, 1, "A") ::
- Row(2, "b", 2, "B") ::
- Row(3, "c", 3, "C") ::
- Row(4, "d", 4, "D") ::
- Row(null, null, 5, "E") ::
- Row(null, null, 6, "F") :: Nil)
- checkAnswer(
- lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"),
- Row(null, null, 1, "A") ::
- Row(2, "b", 2, "B") ::
- Row(3, "c", 3, "C") ::
- Row(4, "d", 4, "D") ::
- Row(null, null, 5, "E") ::
- Row(null, null, 6, "F") :: Nil)
- checkAnswer(
- lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"),
- Row(1, "a", 1, "A") ::
- Row(2, "b", 2, "B") ::
- Row(3, "c", 3, "C") ::
- Row(4, "d", 4, "D") ::
- Row(null, null, 5, "E") ::
- Row(null, null, 6, "F") :: Nil)
+ checkAnswer(
+ upperCaseData.join(lowerCaseData, $"n" === $"N" && $"n" > 1, "left"),
+ Row(1, "A", null, null) ::
+ Row(2, "B", 2, "b") ::
+ Row(3, "C", 3, "c") ::
+ Row(4, "D", 4, "d") ::
+ Row(5, "E", null, null) ::
+ Row(6, "F", null, null) :: Nil)
- // Make sure we are choosing right.outputPartitioning as the
- // outputPartitioning for the outer join operator.
- checkAnswer(
- sql(
- """
- |SELECT l.a, count(*)
- |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
- |GROUP BY l.a
- """.stripMargin),
- Row(null,
- 6))
+ checkAnswer(
+ upperCaseData.join(lowerCaseData, $"n" === $"N" && $"N" > 1, "left"),
+ Row(1, "A", null, null) ::
+ Row(2, "B", 2, "b") ::
+ Row(3, "C", 3, "c") ::
+ Row(4, "D", 4, "d") ::
+ Row(5, "E", null, null) ::
+ Row(6, "F", null, null) :: Nil)
- checkAnswer(
- sql(
- """
- |SELECT r.N, count(*)
- |FROM allNulls l RIGHT OUTER JOIN upperCaseData r ON (l.a = r.N)
- |GROUP BY r.N
- """.stripMargin),
- Row(1
- , 1) ::
+ checkAnswer(
+ upperCaseData.join(lowerCaseData, $"n" === $"N" && $"l" > $"L", "left"),
+ Row(1, "A", 1, "a") ::
+ Row(2, "B", 2, "b") ::
+ Row(3, "C", 3, "c") ::
+ Row(4, "D", 4, "d") ::
+ Row(5, "E", null, null) ::
+ Row(6, "F", null, null) :: Nil)
+
+ // Make sure we are choosing left.outputPartitioning as the
+ // outputPartitioning for the outer join operator.
+ checkAnswer(
+ sql(
+ """
+ |SELECT l.N, count(*)
+ |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a)
+ |GROUP BY l.N
+ """.stripMargin),
+ Row(
+ 1, 1) ::
Row(2, 1) ::
Row(3, 1) ::
Row(4, 1) ::
Row(5, 1) ::
Row(6, 1) :: Nil)
+
+ checkAnswer(
+ sql(
+ """
+ |SELECT r.a, count(*)
+ |FROM uppercasedata l LEFT OUTER JOIN allnulls r ON (l.N = r.a)
+ |GROUP BY r.a
+ """.stripMargin),
+ Row(null, 6) :: Nil)
+ }
+ }
+
+ test("right outer join") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ lowerCaseData.join(upperCaseData, $"n" === $"N", "right"),
+ Row(1, "a", 1, "A") ::
+ Row(2, "b", 2, "B") ::
+ Row(3, "c", 3, "C") ::
+ Row(4, "d", 4, "D") ::
+ Row(null, null, 5, "E") ::
+ Row(null, null, 6, "F") :: Nil)
+ checkAnswer(
+ lowerCaseData.join(upperCaseData, $"n" === $"N" && $"n" > 1, "right"),
+ Row(null, null, 1, "A") ::
+ Row(2, "b", 2, "B") ::
+ Row(3, "c", 3, "C") ::
+ Row(4, "d", 4, "D") ::
+ Row(null, null, 5, "E") ::
+ Row(null, null, 6, "F") :: Nil)
+ checkAnswer(
+ lowerCaseData.join(upperCaseData, $"n" === $"N" && $"N" > 1, "right"),
+ Row(null, null, 1, "A") ::
+ Row(2, "b", 2, "B") ::
+ Row(3, "c", 3, "C") ::
+ Row(4, "d", 4, "D") ::
+ Row(null, null, 5, "E") ::
+ Row(null, null, 6, "F") :: Nil)
+ checkAnswer(
+ lowerCaseData.join(upperCaseData, $"n" === $"N" && $"l" > $"L", "right"),
+ Row(1, "a", 1, "A") ::
+ Row(2, "b", 2, "B") ::
+ Row(3, "c", 3, "C") ::
+ Row(4, "d", 4, "D") ::
+ Row(null, null, 5, "E") ::
+ Row(null, null, 6, "F") :: Nil)
+
+ // Make sure we are choosing right.outputPartitioning as the
+ // outputPartitioning for the outer join operator.
+ checkAnswer(
+ sql(
+ """
+ |SELECT l.a, count(*)
+ |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N)
+ |GROUP BY l.a
+ """.stripMargin),
+ Row(null,
+ 6))
+
+ checkAnswer(
+ sql(
+ """
+ |SELECT r.N, count(*)
+ |FROM allnulls l RIGHT OUTER JOIN uppercasedata r ON (l.a = r.N)
+ |GROUP BY r.N
+ """.stripMargin),
+ Row(1
+ , 1) ::
+ Row(2, 1) ::
+ Row(3, 1) ::
+ Row(4, 1) ::
+ Row(5, 1) ::
+ Row(6, 1) :: Nil)
+ }
}
test("full outer join") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
index bb54c525cb..9f6c86a575 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala
@@ -29,40 +29,40 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex
private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value")
before {
- df.registerTempTable("ListTablesSuiteTable")
+ df.registerTempTable("listtablessuitetable")
}
after {
sqlContext.sessionState.catalog.dropTable(
- TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
+ TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
}
test("get all tables") {
checkAnswer(
- sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"),
- Row("ListTablesSuiteTable", true))
+ sqlContext.tables().filter("tableName = 'listtablessuitetable'"),
+ Row("listtablessuitetable", true))
checkAnswer(
- sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"),
- Row("ListTablesSuiteTable", true))
+ sql("SHOW tables").filter("tableName = 'listtablessuitetable'"),
+ Row("listtablessuitetable", true))
sqlContext.sessionState.catalog.dropTable(
- TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
- assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
+ TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
+ assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0)
}
test("getting all tables with a database name has no impact on returned table names") {
checkAnswer(
- sqlContext.tables("default").filter("tableName = 'ListTablesSuiteTable'"),
- Row("ListTablesSuiteTable", true))
+ sqlContext.tables("default").filter("tableName = 'listtablessuitetable'"),
+ Row("listtablessuitetable", true))
checkAnswer(
- sql("show TABLES in default").filter("tableName = 'ListTablesSuiteTable'"),
- Row("ListTablesSuiteTable", true))
+ sql("show TABLES in default").filter("tableName = 'listtablessuitetable'"),
+ Row("listtablessuitetable", true))
sqlContext.sessionState.catalog.dropTable(
- TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true)
- assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
+ TableIdentifier("listtablessuitetable"), ignoreIfNotExists = true)
+ assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0)
}
test("query the returned DataFrame of tables") {
@@ -77,8 +77,8 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContex
tableDF.registerTempTable("tables")
checkAnswer(
sql(
- "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"),
- Row(true, "ListTablesSuiteTable")
+ "SELECT isTemporary, tableName from tables WHERE tableName = 'listtablessuitetable'"),
+ Row(true, "listtablessuitetable")
)
checkAnswer(
sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 29521afdd8..5065e5b80b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -754,36 +754,43 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
test("inner join where, one match per row") {
- checkAnswer(
- sql("SELECT * FROM upperCaseData JOIN lowerCaseData WHERE n = N"),
- Seq(
- Row(1, "A", 1, "a"),
- Row(2, "B", 2, "b"),
- Row(3, "C", 3, "c"),
- Row(4, "D", 4, "d")))
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ sql("SELECT * FROM uppercasedata JOIN lowercasedata WHERE n = N"),
+ Seq(
+ Row(1, "A", 1, "a"),
+ Row(2, "B", 2, "b"),
+ Row(3, "C", 3, "c"),
+ Row(4, "D", 4, "d")))
+ }
}
test("inner join ON, one match per row") {
- checkAnswer(
- sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON n = N"),
- Seq(
- Row(1, "A", 1, "a"),
- Row(2, "B", 2, "b"),
- Row(3, "C", 3, "c"),
- Row(4, "D", 4, "d")))
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ sql("SELECT * FROM uppercasedata JOIN lowercasedata ON n = N"),
+ Seq(
+ Row(1, "A", 1, "a"),
+ Row(2, "B", 2, "b"),
+ Row(3, "C", 3, "c"),
+ Row(4, "D", 4, "d")))
+ }
}
test("inner join, where, multiple matches") {
- checkAnswer(
- sql("""
- |SELECT * FROM
- | (SELECT * FROM testData2 WHERE a = 1) x JOIN
- | (SELECT * FROM testData2 WHERE a = 1) y
- |WHERE x.a = y.a""".stripMargin),
- Row(1, 1, 1, 1) ::
- Row(1, 1, 1, 2) ::
- Row(1, 2, 1, 1) ::
- Row(1, 2, 1, 2) :: Nil)
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ sql(
+ """
+ |SELECT * FROM
+ | (SELECT * FROM testdata2 WHERE a = 1) x JOIN
+ | (SELECT * FROM testdata2 WHERE a = 1) y
+ |WHERE x.a = y.a""".stripMargin),
+ Row(1, 1, 1, 1) ::
+ Row(1, 1, 1, 2) ::
+ Row(1, 2, 1, 1) ::
+ Row(1, 2, 1, 2) :: Nil)
+ }
}
test("inner join, no matches") {
@@ -825,25 +832,29 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
test("left outer join") {
- checkAnswer(
- sql("SELECT * FROM upperCaseData LEFT OUTER JOIN lowerCaseData ON n = N"),
- Row(1, "A", 1, "a") ::
- Row(2, "B", 2, "b") ::
- Row(3, "C", 3, "c") ::
- Row(4, "D", 4, "d") ::
- Row(5, "E", null, null) ::
- Row(6, "F", null, null) :: Nil)
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"),
+ Row(1, "A", 1, "a") ::
+ Row(2, "B", 2, "b") ::
+ Row(3, "C", 3, "c") ::
+ Row(4, "D", 4, "d") ::
+ Row(5, "E", null, null) ::
+ Row(6, "F", null, null) :: Nil)
+ }
}
test("right outer join") {
- checkAnswer(
- sql("SELECT * FROM lowerCaseData RIGHT OUTER JOIN upperCaseData ON n = N"),
- Row(1, "a", 1, "A") ::
- Row(2, "b", 2, "B") ::
- Row(3, "c", 3, "C") ::
- Row(4, "d", 4, "D") ::
- Row(null, null, 5, "E") ::
- Row(null, null, 6, "F") :: Nil)
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+ checkAnswer(
+ sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"),
+ Row(1, "a", 1, "A") ::
+ Row(2, "b", 2, "B") ::
+ Row(3, "c", 3, "C") ::
+ Row(4, "d", 4, "D") ::
+ Row(null, null, 5, "E") ::
+ Row(null, null, 6, "F") :: Nil)
+ }
}
test("full outer join") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 636bfdebf7..64b9d8424a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -139,7 +139,6 @@ private[hive] class HiveSessionState(ctx: SQLContext) extends SessionState(ctx)
*/
def setDefaultOverrideConfs(): Unit = {
setConf(ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS.varname, "false")
- conf.setConfString("spark.sql.caseSensitive", "false")
}
override def setConf(key: String, value: String): Unit = {