aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiao Li <gatorsmile@gmail.com>2017-04-20 11:13:48 +0100
committerSean Owen <sowen@cloudera.com>2017-04-20 11:13:48 +0100
commit55bea56911a958f6d3ec3ad96fb425cc71ec03f4 (patch)
tree08e8eb3d8f9fe1b473a90cea5bacee5f77e5ffa0
parent46c5749768fefd976097c7d5612ec184a4cfe1b9 (diff)
downloadspark-55bea56911a958f6d3ec3ad96fb425cc71ec03f4.tar.gz
spark-55bea56911a958f6d3ec3ad96fb425cc71ec03f4.tar.bz2
spark-55bea56911a958f6d3ec3ad96fb425cc71ec03f4.zip
[SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs
### What changes were proposed in this pull request? Database and Table names conform the Hive standard ("[a-zA-z_0-9]+"), i.e. if this name only contains characters, numbers, and _. When calling `toLowerCase` on the names, we should add `Locale.ROOT` to the `toLowerCase`for avoiding inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem"). ### How was this patch tested? Added a test case Author: Xiao Li <gatorsmile@gmail.com> Closes #17655 from gatorsmile/locale.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala19
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala28
5 files changed, 54 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index 8841309939..de6de24350 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.analysis
+import java.util.Locale
+
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
import org.apache.spark.sql.catalyst.rules._
@@ -103,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
- builtinFunctions.get(u.functionName.toLowerCase()) match {
+ builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
case Some(tvf) =>
val resolved = tvf.flatMap { case (argList, resolver) =>
argList.implicitCast(u.functionArgs) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 3fbf83f3a3..6c6d600190 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -115,14 +115,14 @@ class SessionCatalog(
* Format table name, taking into account case sensitivity.
*/
protected[this] def formatTableName(name: String): String = {
- if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+ if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
}
/**
* Format database name, taking into account case sensitivity.
*/
protected[this] def formatDatabaseName(name: String): String = {
- if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+ if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 0289471bf8..d06dbaa2d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.internal
+import java.util.Locale
+
import scala.reflect.ClassTag
import scala.util.control.NonFatal
@@ -114,7 +116,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
// System preserved database should not exists in metastore. However it's hard to guarantee it
// for every session, because case-sensitivity differs. Here we always lowercase it to make our
// life easier.
- val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase
+ val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase(Locale.ROOT)
if (externalCatalog.databaseExists(globalTempDB)) {
throw new SparkException(
s"$globalTempDB is a system preserved database, please rename your existing database " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fe74ab49f9..2f4eb1b155 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2295,5 +2295,24 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
}
}
}
+
+ test(s"basic DDL using locale tr - caseSensitive $caseSensitive") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
+ withLocale("tr") {
+ val dbName = "DaTaBaSe_I"
+ withDatabase(dbName) {
+ sql(s"CREATE DATABASE $dbName")
+ sql(s"USE $dbName")
+
+ val tabName = "tAb_I"
+ withTable(tabName) {
+ sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET")
+ sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1")
+ checkAnswer(sql(s"SELECT col_I FROM $tabName"), Row(1) :: Nil)
+ }
+ }
+ }
+ }
+ }
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 6a4cc95d36..b5ad73b746 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.test
import java.io.File
import java.net.URI
import java.nio.file.Files
-import java.util.UUID
+import java.util.{Locale, UUID}
import scala.language.implicitConversions
import scala.util.control.NonFatal
@@ -229,6 +229,32 @@ private[sql] trait SQLTestUtils
}
/**
+ * Drops database `dbName` after calling `f`.
+ */
+ protected def withDatabase(dbNames: String*)(f: => Unit): Unit = {
+ try f finally {
+ dbNames.foreach { name =>
+ spark.sql(s"DROP DATABASE IF EXISTS $name")
+ }
+ }
+ }
+
+ /**
+ * Enables Locale `language` before executing `f`, then switches back to the default locale of JVM
+ * after `f` returns.
+ */
+ protected def withLocale(language: String)(f: => Unit): Unit = {
+ val originalLocale = Locale.getDefault
+ try {
+ // Add Locale setting
+ Locale.setDefault(new Locale(language))
+ f
+ } finally {
+ Locale.setDefault(originalLocale)
+ }
+ }
+
+ /**
* Activates database `db` before executing `f`, then switches back to `default` database after
* `f` returns.
*/