aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2017-04-10 20:11:56 +0100
committerSean Owen <sowen@cloudera.com>2017-04-10 20:11:56 +0100
commita26e3ed5e414d0a350cfe65dd511b154868b9f1d (patch)
treef8bf8feabae7acdd5b2c29e38273fddb80e3de33 /sql/core
parentfd711ea13e558f0e7d3e01f08e01444d394499a6 (diff)
downloadspark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.tar.gz
spark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.tar.bz2
spark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.zip
[SPARK-20156][CORE][SQL][STREAMING][MLLIB] Java String toLowerCase "Turkish locale bug" causes Spark problems
## What changes were proposed in this pull request? Add Locale.ROOT to internal calls to String `toLowerCase`, `toUpperCase`, to avoid inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem"). The change looks large but it is just adding `Locale.ROOT` (the locale with no country or language specified) to every call to these methods. ## How was this patch tested? Existing tests. Author: Sean Owen <sowen@cloudera.com> Closes #17527 from srowen/SPARK-20156.
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala24
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala20
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala16
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala1
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala8
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala5
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala1
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala4
-rw-r--r--sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala3
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala13
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala5
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala9
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala10
35 files changed, 134 insertions, 86 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index d8f953fba5..93d565d9fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql
import java.{lang => jl}
+import java.util.Locale
import scala.collection.JavaConverters._
@@ -89,7 +90,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
* @since 1.3.1
*/
def drop(how: String, cols: Seq[String]): DataFrame = {
- how.toLowerCase match {
+ how.toLowerCase(Locale.ROOT) match {
case "any" => drop(cols.size, cols)
case "all" => drop(1, cols)
case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 2b8537c3d4..49691c15d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql
-import java.util.Properties
+import java.util.{Locale, Properties}
import scala.collection.JavaConverters._
@@ -164,7 +164,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
*/
@scala.annotation.varargs
def load(paths: String*): DataFrame = {
- if (source.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Hive data source can only be used with tables, you can not " +
"read files of Hive data source directly.")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 338a6e1314..1732a8e08b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql
-import java.util.Properties
+import java.util.{Locale, Properties}
import scala.collection.JavaConverters._
@@ -66,7 +66,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* @since 1.4.0
*/
def mode(saveMode: String): DataFrameWriter[T] = {
- this.mode = saveMode.toLowerCase match {
+ this.mode = saveMode.toLowerCase(Locale.ROOT) match {
case "overwrite" => SaveMode.Overwrite
case "append" => SaveMode.Append
case "ignore" => SaveMode.Ignore
@@ -223,7 +223,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* @since 1.4.0
*/
def save(): Unit = {
- if (source.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Hive data source can only be used with tables, you can not " +
"write files of Hive data source directly.")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 0fe8d87ebd..6475543478 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql
+import java.util.Locale
+
import scala.collection.JavaConverters._
import scala.language.implicitConversions
@@ -108,7 +110,7 @@ class RelationalGroupedDataset protected[sql](
private[this] def strToExpr(expr: String): (Expression => Expression) = {
val exprToFunc: (Expression => Expression) = {
- (inputExpr: Expression) => expr.toLowerCase match {
+ (inputExpr: Expression) => expr.toLowerCase(Locale.ROOT) match {
// We special handle a few cases that have alias that are not in function registry.
case "avg" | "average" | "mean" =>
UnresolvedFunction("avg", inputExpr :: Nil, isDistinct = false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index c77328690d..a26d00411f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.api.r
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
-import java.util.{Map => JMap}
+import java.util.{Locale, Map => JMap}
import scala.collection.JavaConverters._
import scala.util.matching.Regex
@@ -47,17 +47,19 @@ private[sql] object SQLUtils extends Logging {
jsc: JavaSparkContext,
sparkConfigMap: JMap[Object, Object],
enableHiveSupport: Boolean): SparkSession = {
- val spark = if (SparkSession.hiveClassesArePresent && enableHiveSupport
- && jsc.sc.conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
- SparkSession.builder().sparkContext(withHiveExternalCatalog(jsc.sc)).getOrCreate()
- } else {
- if (enableHiveSupport) {
- logWarning("SparkR: enableHiveSupport is requested for SparkSession but " +
- s"Spark is not built with Hive or ${CATALOG_IMPLEMENTATION.key} is not set to 'hive', " +
- "falling back to without Hive support.")
+ val spark =
+ if (SparkSession.hiveClassesArePresent && enableHiveSupport &&
+ jsc.sc.conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase(Locale.ROOT) ==
+ "hive") {
+ SparkSession.builder().sparkContext(withHiveExternalCatalog(jsc.sc)).getOrCreate()
+ } else {
+ if (enableHiveSupport) {
+ logWarning("SparkR: enableHiveSupport is requested for SparkSession but " +
+ s"Spark is not built with Hive or ${CATALOG_IMPLEMENTATION.key} is not set to " +
+ "'hive', falling back to without Hive support.")
+ }
+ SparkSession.builder().sparkContext(jsc.sc).getOrCreate()
}
- SparkSession.builder().sparkContext(jsc.sc).getOrCreate()
- }
setSparkContextSessionConf(spark, sparkConfigMap)
spark
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 80afb59b3e..20dacf8850 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution
+import java.util.Locale
+
import scala.collection.JavaConverters._
import org.antlr.v4.runtime.{ParserRuleContext, Token}
@@ -103,7 +105,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
logWarning(s"Partition specification is ignored: ${ctx.partitionSpec.getText}")
}
if (ctx.identifier != null) {
- if (ctx.identifier.getText.toLowerCase != "noscan") {
+ if (ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
throw new ParseException(s"Expected `NOSCAN` instead of `${ctx.identifier.getText}`", ctx)
}
AnalyzeTableCommand(visitTableIdentifier(ctx.tableIdentifier))
@@ -563,7 +565,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
} else if (value.STRING != null) {
string(value.STRING)
} else if (value.booleanValue != null) {
- value.getText.toLowerCase
+ value.getText.toLowerCase(Locale.ROOT)
} else {
value.getText
}
@@ -647,7 +649,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
*/
override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
import ctx._
- val (user, system) = Option(ctx.identifier).map(_.getText.toLowerCase) match {
+ val (user, system) = Option(ctx.identifier).map(_.getText.toLowerCase(Locale.ROOT)) match {
case None | Some("all") => (true, true)
case Some("system") => (false, true)
case Some("user") => (true, false)
@@ -677,7 +679,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
*/
override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = withOrigin(ctx) {
val resources = ctx.resource.asScala.map { resource =>
- val resourceType = resource.identifier.getText.toLowerCase
+ val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
resourceType match {
case "jar" | "file" | "archive" =>
FunctionResource(FunctionResourceType.fromString(resourceType), string(resource.STRING))
@@ -959,7 +961,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
.flatMap(_.orderedIdentifier.asScala)
.map { orderedIdCtx =>
Option(orderedIdCtx.ordering).map(_.getText).foreach { dir =>
- if (dir.toLowerCase != "asc") {
+ if (dir.toLowerCase(Locale.ROOT) != "asc") {
operationNotAllowed(s"Column ordering must be ASC, was '$dir'", ctx)
}
}
@@ -1012,13 +1014,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
val mayebePaths = remainder(ctx.identifier).trim
ctx.op.getType match {
case SqlBaseParser.ADD =>
- ctx.identifier.getText.toLowerCase match {
+ ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
case "file" => AddFileCommand(mayebePaths)
case "jar" => AddJarCommand(mayebePaths)
case other => operationNotAllowed(s"ADD with resource type '$other'", ctx)
}
case SqlBaseParser.LIST =>
- ctx.identifier.getText.toLowerCase match {
+ ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
case "files" | "file" =>
if (mayebePaths.length > 0) {
ListFilesCommand(mayebePaths.split("\\s+"))
@@ -1305,7 +1307,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
(rowFormatCtx, createFileFormatCtx.fileFormat) match {
case (_, ffTable: TableFileFormatContext) => // OK
case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) =>
- ffGeneric.identifier.getText.toLowerCase match {
+ ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
case ("sequencefile" | "textfile" | "rcfile") => // OK
case fmt =>
operationNotAllowed(
@@ -1313,7 +1315,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
parentCtx)
}
case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) =>
- ffGeneric.identifier.getText.toLowerCase match {
+ ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
case "textfile" => // OK
case fmt => operationNotAllowed(
s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index c31fd92447..c1e1a631c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -17,7 +17,9 @@
package org.apache.spark.sql.execution
-import org.apache.spark.{broadcast, TaskContext}
+import java.util.Locale
+
+import org.apache.spark.broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
@@ -43,7 +45,7 @@ trait CodegenSupport extends SparkPlan {
case _: SortMergeJoinExec => "smj"
case _: RDDScanExec => "rdd"
case _: DataSourceScanExec => "scan"
- case _ => nodeName.toLowerCase
+ case _ => nodeName.toLowerCase(Locale.ROOT)
}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 9d3c55060d..55540563ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.command
+import java.util.Locale
+
import scala.collection.{GenMap, GenSeq}
import scala.collection.parallel.ForkJoinTaskSupport
import scala.concurrent.forkjoin.ForkJoinPool
@@ -764,11 +766,11 @@ object DDLUtils {
val HIVE_PROVIDER = "hive"
def isHiveTable(table: CatalogTable): Boolean = {
- table.provider.isDefined && table.provider.get.toLowerCase == HIVE_PROVIDER
+ table.provider.isDefined && table.provider.get.toLowerCase(Locale.ROOT) == HIVE_PROVIDER
}
def isDatasourceTable(table: CatalogTable): Boolean = {
- table.provider.isDefined && table.provider.get.toLowerCase != HIVE_PROVIDER
+ table.provider.isDefined && table.provider.get.toLowerCase(Locale.ROOT) != HIVE_PROVIDER
}
/**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index ea5398761c..5687f93324 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.command
+import java.util.Locale
+
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException}
@@ -100,7 +102,7 @@ case class DescribeFunctionCommand(
override def run(sparkSession: SparkSession): Seq[Row] = {
// Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions.
- functionName.funcName.toLowerCase match {
+ functionName.funcName.toLowerCase(Locale.ROOT) match {
case "<>" =>
Row(s"Function: $functionName") ::
Row("Usage: expr1 <> expr2 - " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index c9384e4425..f3b209deaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -17,12 +17,11 @@
package org.apache.spark.sql.execution.datasources
-import java.util.{ServiceConfigurationError, ServiceLoader}
+import java.util.{Locale, ServiceConfigurationError, ServiceLoader}
import scala.collection.JavaConverters._
import scala.language.{existentials, implicitConversions}
import scala.util.{Failure, Success, Try}
-import scala.util.control.NonFatal
import org.apache.hadoop.fs.Path
@@ -539,15 +538,16 @@ object DataSource {
// Found the data source using fully qualified path
dataSource
case Failure(error) =>
- if (provider1.toLowerCase == "orc" ||
+ if (provider1.toLowerCase(Locale.ROOT) == "orc" ||
provider1.startsWith("org.apache.spark.sql.hive.orc")) {
throw new AnalysisException(
"The ORC data source must be used with Hive support enabled")
- } else if (provider1.toLowerCase == "avro" ||
+ } else if (provider1.toLowerCase(Locale.ROOT) == "avro" ||
provider1 == "com.databricks.spark.avro") {
throw new AnalysisException(
- s"Failed to find data source: ${provider1.toLowerCase}. Please find an Avro " +
- "package at http://spark.apache.org/third-party-projects.html")
+ s"Failed to find data source: ${provider1.toLowerCase(Locale.ROOT)}. " +
+ "Please find an Avro package at " +
+ "http://spark.apache.org/third-party-projects.html")
} else {
throw new ClassNotFoundException(
s"Failed to find data source: $provider1. Please find packages at " +
@@ -596,8 +596,8 @@ object DataSource {
*/
def buildStorageFormatFromOptions(options: Map[String, String]): CatalogStorageFormat = {
val path = CaseInsensitiveMap(options).get("path")
- val optionsWithoutPath = options.filterKeys(_.toLowerCase != "path")
+ val optionsWithoutPath = options.filterKeys(_.toLowerCase(Locale.ROOT) != "path")
CatalogStorageFormat.empty.copy(
- locationUri = path.map(CatalogUtils.stringToURI(_)), properties = optionsWithoutPath)
+ locationUri = path.map(CatalogUtils.stringToURI), properties = optionsWithoutPath)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 11605dd280..9897ab73b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -245,7 +245,6 @@ object InMemoryFileIndex extends Logging {
sessionOpt: Option[SparkSession]): Seq[FileStatus] = {
logTrace(s"Listing $path")
val fs = path.getFileSystem(hadoopConf)
- val name = path.getName.toLowerCase
// [SPARK-17599] Prevent InMemoryFileIndex from failing if path doesn't exist
// Note that statuses only include FileStatus for the files and dirs directly under path,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 03980922ab..c3583209ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources
import java.lang.{Double => JDouble, Long => JLong}
import java.math.{BigDecimal => JBigDecimal}
-import java.util.TimeZone
+import java.util.{Locale, TimeZone}
import scala.collection.mutable.ArrayBuffer
import scala.util.Try
@@ -194,7 +194,7 @@ object PartitioningUtils {
while (!finished) {
// Sometimes (e.g., when speculative task is enabled), temporary directories may be left
// uncleaned. Here we simply ignore them.
- if (currentPath.getName.toLowerCase == "_temporary") {
+ if (currentPath.getName.toLowerCase(Locale.ROOT) == "_temporary") {
return (None, None)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 4994b8dc80..62e4c6e4b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -71,9 +71,9 @@ class CSVOptions(
val param = parameters.getOrElse(paramName, default.toString)
if (param == null) {
default
- } else if (param.toLowerCase == "true") {
+ } else if (param.toLowerCase(Locale.ROOT) == "true") {
true
- } else if (param.toLowerCase == "false") {
+ } else if (param.toLowerCase(Locale.ROOT) == "false") {
false
} else {
throw new Exception(s"$paramName flag can be true or false")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index 110d503f91..f8d4a9bb5b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.datasources
+import java.util.Locale
+
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
@@ -75,7 +77,7 @@ case class CreateTempViewUsing(
}
def run(sparkSession: SparkSession): Seq[Row] = {
- if (provider.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Hive data source can only be used with tables, " +
"you can't use it with CREATE TEMP VIEW USING")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 89fe86c038..591096d5ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.execution.datasources.jdbc
import java.sql.{Connection, DriverManager}
-import java.util.Properties
+import java.util.{Locale, Properties}
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -55,7 +55,7 @@ class JDBCOptions(
*/
val asConnectionProperties: Properties = {
val properties = new Properties()
- parameters.originalMap.filterKeys(key => !jdbcOptionNames(key.toLowerCase))
+ parameters.originalMap.filterKeys(key => !jdbcOptionNames(key.toLowerCase(Locale.ROOT)))
.foreach { case (k, v) => properties.setProperty(k, v) }
properties
}
@@ -141,7 +141,7 @@ object JDBCOptions {
private val jdbcOptionNames = collection.mutable.Set[String]()
private def newOption(name: String): String = {
- jdbcOptionNames += name.toLowerCase
+ jdbcOptionNames += name.toLowerCase(Locale.ROOT)
name
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 774d1ba194..5fc3c2753b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.datasources.jdbc
import java.sql.{Connection, Driver, DriverManager, PreparedStatement, ResultSet, ResultSetMetaData, SQLException}
+import java.util.Locale
import scala.collection.JavaConverters._
import scala.util.Try
@@ -542,7 +543,7 @@ object JdbcUtils extends Logging {
case ArrayType(et, _) =>
// remove type length parameters from end of type name
val typeName = getJdbcType(et, dialect).databaseTypeDefinition
- .toLowerCase.split("\\(")(0)
+ .toLowerCase(Locale.ROOT).split("\\(")(0)
(stmt: PreparedStatement, row: Row, pos: Int) =>
val array = conn.createArrayOf(
typeName,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index bdda299a62..772d4565de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.datasources.parquet
+import java.util.Locale
+
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -40,9 +42,11 @@ private[parquet] class ParquetOptions(
* Acceptable values are defined in [[shortParquetCompressionCodecNames]].
*/
val compressionCodecClassName: String = {
- val codecName = parameters.getOrElse("compression", sqlConf.parquetCompressionCodec).toLowerCase
+ val codecName = parameters.getOrElse("compression",
+ sqlConf.parquetCompressionCodec).toLowerCase(Locale.ROOT)
if (!shortParquetCompressionCodecNames.contains(codecName)) {
- val availableCodecs = shortParquetCompressionCodecNames.keys.map(_.toLowerCase)
+ val availableCodecs =
+ shortParquetCompressionCodecNames.keys.map(_.toLowerCase(Locale.ROOT))
throw new IllegalArgumentException(s"Codec [$codecName] " +
s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 8b598cc60e..7abf2ae516 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.datasources
+import java.util.Locale
+
import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.analysis._
import org.apache.spark.sql.catalyst.catalog._
@@ -48,7 +50,8 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
// will catch it and return the original plan, so that the analyzer can report table not
// found later.
val isFileFormat = classOf[FileFormat].isAssignableFrom(dataSource.providingClass)
- if (!isFileFormat || dataSource.className.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (!isFileFormat ||
+ dataSource.className.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Unsupported data source type for direct query on files: " +
s"${u.tableIdentifier.database.get}")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index f9dd80230e..1426728f9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.streaming.state
import java.io.{DataInputStream, DataOutputStream, FileNotFoundException, IOException}
+import java.util.Locale
import scala.collection.JavaConverters._
import scala.collection.mutable
@@ -599,7 +600,7 @@ private[state] class HDFSBackedStateStoreProvider(
val nameParts = path.getName.split("\\.")
if (nameParts.size == 2) {
val version = nameParts(0).toLong
- nameParts(1).toLowerCase match {
+ nameParts(1).toLowerCase(Locale.ROOT) match {
case "delta" =>
// ignore the file otherwise, snapshot file already exists for that batch id
if (!versionToFiles.contains(version)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index ca46a1151e..b9515ec7bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.internal
+import java.util.Locale
+
import org.apache.spark.sql.catalyst.catalog.CatalogStorageFormat
case class HiveSerDe(
@@ -68,7 +70,7 @@ object HiveSerDe {
* @return HiveSerDe associated with the specified source
*/
def sourceToSerDe(source: String): Option[HiveSerDe] = {
- val key = source.toLowerCase match {
+ val key = source.toLowerCase(Locale.ROOT) match {
case s if s.startsWith("org.apache.spark.sql.parquet") => "parquet"
case s if s.startsWith("org.apache.spark.sql.orc") => "orc"
case s if s.equals("orcfile") => "orc"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 1ef9d52713..0289471bf8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -21,7 +21,6 @@ import scala.reflect.ClassTag
import scala.util.control.NonFatal
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
import org.apache.spark.{SparkConf, SparkContext, SparkException}
import org.apache.spark.internal.Logging
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index c3a9cfc085..746b2a94f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.streaming
+import java.util.Locale
+
import scala.collection.JavaConverters._
import org.apache.spark.annotation.{Experimental, InterfaceStability}
@@ -135,7 +137,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
* @since 2.0.0
*/
def load(): DataFrame = {
- if (source.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Hive data source can only be used with tables, you can not " +
"read files of Hive data source directly.")
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index f2f700590c..0d2611f9bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.streaming
+import java.util.Locale
+
import scala.collection.JavaConverters._
import org.apache.spark.annotation.{Experimental, InterfaceStability}
@@ -230,7 +232,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
* @since 2.0.0
*/
def start(): StreamingQuery = {
- if (source.toLowerCase == DDLUtils.HIVE_PROVIDER) {
+ if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
throw new AnalysisException("Hive data source can only be used with tables, you can not " +
"write files of Hive data source directly.")
}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 78cf033dd8..3ba37addfc 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -119,7 +119,7 @@ public class JavaDatasetSuite implements Serializable {
Dataset<String> parMapped = ds.mapPartitions((MapPartitionsFunction<String, String>) it -> {
List<String> ls = new LinkedList<>();
while (it.hasNext()) {
- ls.add(it.next().toUpperCase(Locale.ENGLISH));
+ ls.add(it.next().toUpperCase(Locale.ROOT));
}
return ls.iterator();
}, Encoders.STRING());
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 4b69baffab..d9130fdcfa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -124,7 +124,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
}
private def createScalaTestCase(testCase: TestCase): Unit = {
- if (blackList.exists(t => testCase.name.toLowerCase.contains(t.toLowerCase))) {
+ if (blackList.exists(t =>
+ testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
// Create a test case to ignore this case.
ignore(testCase.name) { /* Do nothing */ }
} else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 8bceab39f7..1c1931b6a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.execution
+import java.util.Locale
+
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
import org.apache.spark.sql.test.SharedSQLContext
@@ -24,11 +26,12 @@ class QueryExecutionSuite extends SharedSQLContext {
test("toString() exception/error handling") {
val badRule = new SparkStrategy {
var mode: String = ""
- override def apply(plan: LogicalPlan): Seq[SparkPlan] = mode.toLowerCase match {
- case "exception" => throw new AnalysisException(mode)
- case "error" => throw new Error(mode)
- case _ => Nil
- }
+ override def apply(plan: LogicalPlan): Seq[SparkPlan] =
+ mode.toLowerCase(Locale.ROOT) match {
+ case "exception" => throw new AnalysisException(mode)
+ case "error" => throw new Error(mode)
+ case _ => Nil
+ }
}
spark.experimental.extraStrategies = badRule :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 13202a5785..97c61dc869 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.execution.command
import java.net.URI
+import java.util.Locale
import scala.reflect.{classTag, ClassTag}
@@ -40,8 +41,10 @@ class DDLCommandSuite extends PlanTest {
val e = intercept[ParseException] {
parser.parsePlan(sql)
}
- assert(e.getMessage.toLowerCase.contains("operation not allowed"))
- containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p.toLowerCase)) }
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
+ containsThesePhrases.foreach { p =>
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT)))
+ }
}
private def parseAs[T: ClassTag](query: String): T = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9ebf2dd839..fe74ab49f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.command
import java.io.File
import java.net.URI
+import java.util.Locale
import org.apache.hadoop.fs.Path
import org.scalatest.BeforeAndAfterEach
@@ -190,7 +191,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
val e = intercept[AnalysisException] {
sql(query)
}
- assert(e.getMessage.toLowerCase.contains("operation not allowed"))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
}
private def maybeWrapException[T](expectException: Boolean)(body: => T): Unit = {
@@ -1813,7 +1814,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
withTable(tabName) {
sql(s"CREATE TABLE $tabName(col1 int, col2 string) USING parquet ")
val message = intercept[AnalysisException] {
- sql(s"SHOW COLUMNS IN $db.showcolumn FROM ${db.toUpperCase}")
+ sql(s"SHOW COLUMNS IN $db.showcolumn FROM ${db.toUpperCase(Locale.ROOT)}")
}.getMessage
assert(message.contains("SHOW COLUMNS with conflicting databases"))
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 57a0af1dda..94a2f9a00b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.execution.datasources.parquet
+import java.util.Locale
+
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.reflect.ClassTag
@@ -300,7 +302,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
def checkCompressionCodec(codec: CompressionCodecName): Unit = {
withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> codec.name()) {
withParquetFile(data) { path =>
- assertResult(spark.conf.get(SQLConf.PARQUET_COMPRESSION).toUpperCase) {
+ assertResult(spark.conf.get(SQLConf.PARQUET_COMPRESSION).toUpperCase(Locale.ROOT)) {
compressionCodecFor(path, codec.name())
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 2b20b9716b..b4f3de9961 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet
import java.io.File
import java.math.BigInteger
import java.sql.{Date, Timestamp}
-import java.util.{Calendar, TimeZone}
+import java.util.{Calendar, Locale, TimeZone}
import scala.collection.mutable.ArrayBuffer
@@ -476,7 +476,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
assert(partDf.schema.map(_.name) === Seq("intField", "stringField"))
path.listFiles().foreach { f =>
- if (!f.getName.startsWith("_") && f.getName.toLowerCase().endsWith(".parquet")) {
+ if (!f.getName.startsWith("_") &&
+ f.getName.toLowerCase(Locale.ROOT).endsWith(".parquet")) {
// when the input is a path to a parquet file
val df = spark.read.parquet(f.getCanonicalPath)
assert(df.schema.map(_.name) === Seq("intField", "stringField"))
@@ -484,7 +485,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
}
path.listFiles().foreach { f =>
- if (!f.getName.startsWith("_") && f.getName.toLowerCase().endsWith(".parquet")) {
+ if (!f.getName.startsWith("_") &&
+ f.getName.toLowerCase(Locale.ROOT).endsWith(".parquet")) {
// when the input is a path to a parquet file but `basePath` is overridden to
// the base path containing partitioning directories
val df = spark
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
index be56c964a1..5a0388ec1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.sources
+import java.util.Locale
+
import scala.language.existentials
import org.apache.spark.rdd.RDD
@@ -76,7 +78,7 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sparkSession: S
case "b" => (i: Int) => Seq(i * 2)
case "c" => (i: Int) =>
val c = (i - 1 + 'a').toChar.toString
- Seq(c * 5 + c.toUpperCase * 5)
+ Seq(c * 5 + c.toUpperCase(Locale.ROOT) * 5)
}
FiltersPushed.list = filters
@@ -113,7 +115,8 @@ case class SimpleFilteredScan(from: Int, to: Int)(@transient val sparkSession: S
}
def eval(a: Int) = {
- val c = (a - 1 + 'a').toChar.toString * 5 + (a - 1 + 'a').toChar.toString.toUpperCase * 5
+ val c = (a - 1 + 'a').toChar.toString * 5 +
+ (a - 1 + 'a').toChar.toString.toUpperCase(Locale.ROOT) * 5
filters.forall(translateFilterOnA(_)(a)) && filters.forall(translateFilterOnC(_)(c))
}
@@ -151,7 +154,7 @@ class FilteredScanSuite extends DataSourceTest with SharedSQLContext with Predic
sqlTest(
"SELECT * FROM oneToTenFiltered",
(1 to 10).map(i => Row(i, i * 2, (i - 1 + 'a').toChar.toString * 5
- + (i - 1 + 'a').toChar.toString.toUpperCase * 5)).toSeq)
+ + (i - 1 + 'a').toChar.toString.toUpperCase(Locale.ROOT) * 5)).toSeq)
sqlTest(
"SELECT a, b FROM oneToTenFiltered",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index f67444fbc4..1211242b9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.streaming
+import java.util.Locale
+
import org.apache.spark.sql.{AnalysisException, DataFrame}
import org.apache.spark.sql.execution.DataSourceScanExec
import org.apache.spark.sql.execution.datasources._
@@ -221,7 +223,7 @@ class FileStreamSinkSuite extends StreamTest {
df.writeStream.format("parquet").outputMode(mode).start(dir.getCanonicalPath)
}
Seq(mode, "not support").foreach { w =>
- assert(e.getMessage.toLowerCase.contains(w))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(w))
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index e5d5b4f328..f796a4cb4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -17,7 +17,7 @@
package org.apache.spark.sql.streaming
-import java.util.TimeZone
+import java.util.{Locale, TimeZone}
import org.scalatest.BeforeAndAfterAll
@@ -105,7 +105,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
testStream(aggregated, Append)()
}
Seq("append", "not supported").foreach { m =>
- assert(e.getMessage.toLowerCase.contains(m.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(m.toLowerCase(Locale.ROOT)))
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 05cd3d9f7c..dc2506a48a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.streaming.test
import java.io.File
+import java.util.Locale
import java.util.concurrent.TimeUnit
import scala.concurrent.duration._
@@ -126,7 +127,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
.save()
}
Seq("'write'", "not", "streaming Dataset/DataFrame").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
}
@@ -400,7 +401,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
var w = df.writeStream
var e = intercept[IllegalArgumentException](w.foreach(null))
Seq("foreach", "null").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
}
@@ -417,7 +418,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
var w = df.writeStream.partitionBy("value")
var e = intercept[AnalysisException](w.foreach(foreachWriter).start())
Seq("foreach", "partitioning").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 7c71e7280c..fb15e7def6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.test
import java.io.File
+import java.util.Locale
import java.util.concurrent.ConcurrentLinkedQueue
import org.scalatest.BeforeAndAfter
@@ -144,7 +145,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
.start()
}
Seq("'writeStream'", "only", "streaming Dataset/DataFrame").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
}
@@ -276,13 +277,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
var w = df.write.partitionBy("value")
var e = intercept[AnalysisException](w.jdbc(null, null, null))
Seq("jdbc", "partitioning").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
w = df.write.bucketBy(2, "value")
e = intercept[AnalysisException](w.jdbc(null, null, null))
Seq("jdbc", "bucketing").foreach { s =>
- assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
}
}
@@ -385,7 +386,8 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
// Reader, with user specified schema, should just apply user schema on the file data
val e = intercept[AnalysisException] { spark.read.schema(userSchema).textFile() }
- assert(e.getMessage.toLowerCase.contains("user specified schema not supported"))
+ assert(e.getMessage.toLowerCase(Locale.ROOT).contains(
+ "user specified schema not supported"))
intercept[AnalysisException] { spark.read.schema(userSchema).textFile(dir) }
intercept[AnalysisException] { spark.read.schema(userSchema).textFile(dir, dir) }
intercept[AnalysisException] { spark.read.schema(userSchema).textFile(Seq(dir, dir): _*) }