aboutsummaryrefslogtreecommitdiff
path: root/unsafe
diff options
context:
space:
mode:
Diffstat (limited to 'unsafe')
-rw-r--r--unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java88
-rw-r--r--unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java8
2 files changed, 96 insertions, 0 deletions
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 208503d2fd..213dc761bb 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -279,6 +279,29 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
* Returns the upper case of this string
*/
public UTF8String toUpperCase() {
+ if (numBytes == 0) {
+ return EMPTY_UTF8;
+ }
+
+ byte[] bytes = new byte[numBytes];
+ bytes[0] = (byte) Character.toTitleCase(getByte(0));
+ for (int i = 0; i < numBytes; i++) {
+ byte b = getByte(i);
+ if (numBytesForFirstByte(b) != 1) {
+ // fallback
+ return toUpperCaseSlow();
+ }
+ int upper = Character.toUpperCase((int) b);
+ if (upper > 127) {
+ // fallback
+ return toUpperCaseSlow();
+ }
+ bytes[i] = (byte) upper;
+ }
+ return fromBytes(bytes);
+ }
+
+ private UTF8String toUpperCaseSlow() {
return fromString(toString().toUpperCase());
}
@@ -286,10 +309,75 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable {
* Returns the lower case of this string
*/
public UTF8String toLowerCase() {
+ if (numBytes == 0) {
+ return EMPTY_UTF8;
+ }
+
+ byte[] bytes = new byte[numBytes];
+ bytes[0] = (byte) Character.toTitleCase(getByte(0));
+ for (int i = 0; i < numBytes; i++) {
+ byte b = getByte(i);
+ if (numBytesForFirstByte(b) != 1) {
+ // fallback
+ return toLowerCaseSlow();
+ }
+ int lower = Character.toLowerCase((int) b);
+ if (lower > 127) {
+ // fallback
+ return toLowerCaseSlow();
+ }
+ bytes[i] = (byte) lower;
+ }
+ return fromBytes(bytes);
+ }
+
+ private UTF8String toLowerCaseSlow() {
return fromString(toString().toLowerCase());
}
/**
+ * Returns the title case of this string, that could be used as title.
+ */
+ public UTF8String toTitleCase() {
+ if (numBytes == 0) {
+ return EMPTY_UTF8;
+ }
+
+ byte[] bytes = new byte[numBytes];
+ for (int i = 0; i < numBytes; i++) {
+ byte b = getByte(i);
+ if (i == 0 || getByte(i - 1) == ' ') {
+ if (numBytesForFirstByte(b) != 1) {
+ // fallback
+ return toTitleCaseSlow();
+ }
+ int upper = Character.toTitleCase(b);
+ if (upper > 127) {
+ // fallback
+ return toTitleCaseSlow();
+ }
+ bytes[i] = (byte) upper;
+ } else {
+ bytes[i] = b;
+ }
+ }
+ return fromBytes(bytes);
+ }
+
+ private UTF8String toTitleCaseSlow() {
+ StringBuffer sb = new StringBuffer();
+ String s = toString();
+ sb.append(s);
+ sb.setCharAt(0, Character.toTitleCase(sb.charAt(0)));
+ for (int i = 1; i < s.length(); i++) {
+ if (sb.charAt(i - 1) == ' ') {
+ sb.setCharAt(i, Character.toTitleCase(sb.charAt(i)));
+ }
+ }
+ return fromString(sb.toString());
+ }
+
+ /**
* Copy the bytes from the current UTF8String, and make a new UTF8String.
* @param start the start position of the current UTF8String in bytes.
* @param end the end position of the current UTF8String in bytes.
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index ed50cdcb29..9b3190f8f0 100644
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -115,6 +115,14 @@ public class UTF8StringSuite {
}
@Test
+ public void titleCase() {
+ assertEquals(fromString(""), fromString("").toTitleCase());
+ assertEquals(fromString("Ab Bc Cd"), fromString("ab bc cd").toTitleCase());
+ assertEquals(fromString("Ѐ Ё Ђ Ѻ Ώ Ề"), fromString("ѐ ё ђ ѻ ώ ề").toTitleCase());
+ assertEquals(fromString("大千世界 数据砖头"), fromString("大千世界 数据砖头").toTitleCase());
+ }
+
+ @Test
public void concatTest() {
assertEquals(EMPTY_UTF8, concat());
assertEquals(null, concat((UTF8String) null));