diff options
author | Sean Owen <sowen@cloudera.com> | 2017-04-10 20:11:56 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2017-04-10 20:11:56 +0100 |
commit | a26e3ed5e414d0a350cfe65dd511b154868b9f1d (patch) | |
tree | f8bf8feabae7acdd5b2c29e38273fddb80e3de33 /common | |
parent | fd711ea13e558f0e7d3e01f08e01444d394499a6 (diff) | |
download | spark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.tar.gz spark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.tar.bz2 spark-a26e3ed5e414d0a350cfe65dd511b154868b9f1d.zip |
[SPARK-20156][CORE][SQL][STREAMING][MLLIB] Java String toLowerCase "Turkish locale bug" causes Spark problems
## What changes were proposed in this pull request?
Add Locale.ROOT to internal calls to String `toLowerCase`, `toUpperCase`, to avoid inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem").
The change looks large but it is just adding `Locale.ROOT` (the locale with no country or language specified) to every call to these methods.
## How was this patch tested?
Existing tests.
Author: Sean Owen <sowen@cloudera.com>
Closes #17527 from srowen/SPARK-20156.
Diffstat (limited to 'common')
-rw-r--r-- | common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java | 5 | ||||
-rw-r--r-- | common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java | 5 |
2 files changed, 7 insertions, 3 deletions
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java index 51d7fda0cb..afc59efaef 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.nio.charset.StandardCharsets; +import java.util.Locale; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -210,7 +211,7 @@ public class JavaUtils { * The unit is also considered the default if the given string does not specify a unit. */ public static long timeStringAs(String str, TimeUnit unit) { - String lower = str.toLowerCase().trim(); + String lower = str.toLowerCase(Locale.ROOT).trim(); try { Matcher m = Pattern.compile("(-?[0-9]+)([a-z]+)?").matcher(lower); @@ -258,7 +259,7 @@ public class JavaUtils { * provided, a direct conversion to the provided unit is attempted. */ public static long byteStringAs(String str, ByteUnit unit) { - String lower = str.toLowerCase().trim(); + String lower = str.toLowerCase(Locale.ROOT).trim(); try { Matcher m = Pattern.compile("([0-9]+)([a-z]+)?").matcher(lower); diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index c226d8f3bc..a25078e262 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -17,6 +17,7 @@ package org.apache.spark.network.util; +import java.util.Locale; import java.util.Properties; import com.google.common.primitives.Ints; @@ -75,7 +76,9 @@ public class TransportConf { } /** IO mode: nio or epoll */ - public String ioMode() { return conf.get(SPARK_NETWORK_IO_MODE_KEY, "NIO").toUpperCase(); } + public String ioMode() { + return conf.get(SPARK_NETWORK_IO_MODE_KEY, "NIO").toUpperCase(Locale.ROOT); + } /** If true, we will prefer allocating off-heap byte buffers within Netty. */ public boolean preferDirectBufs() { |