diff options
author | Davies Liu <davies@databricks.com> | 2015-07-02 15:58:42 -0700 |
---|---|---|
committer | Davies Liu <davies@databricks.com> | 2015-07-02 15:58:42 -0700 |
commit | 3f1e4efbd8f67a3bb1a9e33be91e2a07e0aa01b0 (patch) | |
tree | d6d390185a59e3b0bcb67d3550add12a54427305 | |
parent | bc51bcaea734fe64a90d007559e76f5ceebfea9e (diff) | |
download | spark-3f1e4efbd8f67a3bb1a9e33be91e2a07e0aa01b0.tar.gz spark-3f1e4efbd8f67a3bb1a9e33be91e2a07e0aa01b0.tar.bz2 spark-3f1e4efbd8f67a3bb1a9e33be91e2a07e0aa01b0.zip |
fix string order for non-ascii character
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala | 3 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala | 5 |
2 files changed, 7 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala index f5d8fcced3..e17743a9f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala @@ -156,7 +156,8 @@ final class UTF8String extends Ordered[UTF8String] with Serializable { var i: Int = 0 val b = other.getBytes while (i < bytes.length && i < b.length) { - val res = bytes(i).compareTo(b(i)) + // UTF8 should be compared as unsigned int + val res = (bytes(i) & 0xFF) - (b(i) & 0xFF) if (res != 0) return res i += 1 } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala index 81d7ab010f..02feee4313 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/UTF8StringSuite.scala @@ -40,6 +40,11 @@ class UTF8StringSuite extends SparkFunSuite { check("世 界", 3) } + test("compare") { + assert(UTF8String("abc") > UTF8String("ABC")) + assert(UTF8String("世 界") > UTF8String("abc")) + } + test("contains") { assert(UTF8String("hello").contains(UTF8String("ello"))) assert(!UTF8String("hello").contains(UTF8String("vello"))) |