aboutsummaryrefslogtreecommitdiff
path: root/java
diff options
context:
space:
mode:
authorViktor Szathmáry <phraktle@gmail.com>2014-09-09 16:31:51 +0200
committerTamir Duberstein <tamird@gmail.com>2015-04-02 14:48:43 -0700
commite84893f6768f136cc86e2db69fc1d40ff2be7e3b (patch)
treec36057efe7fc3c3bf50381c96bae16cf73234fa5 /java
parent7139d1eff739682a088ea2c2dbdfef2f108321f8 (diff)
downloadprotobuf-e84893f6768f136cc86e2db69fc1d40ff2be7e3b.tar.gz
protobuf-e84893f6768f136cc86e2db69fc1d40ff2be7e3b.tar.bz2
protobuf-e84893f6768f136cc86e2db69fc1d40ff2be7e3b.zip
perf: String#getBytes(Charset) vs getBytes(String)
Diffstat (limited to 'java')
-rw-r--r--java/src/main/java/com/google/protobuf/ByteString.java49
-rw-r--r--java/src/main/java/com/google/protobuf/LiteralByteString.java10
-rw-r--r--java/src/main/java/com/google/protobuf/RopeByteString.java10
-rw-r--r--java/src/test/java/com/google/protobuf/BoundedByteStringTest.java13
-rw-r--r--java/src/test/java/com/google/protobuf/LiteralByteStringTest.java7
-rw-r--r--java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java30
-rw-r--r--java/src/test/java/com/google/protobuf/RopeByteStringTest.java26
7 files changed, 117 insertions, 28 deletions
diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java
index 637df5f4..ee2eddb0 100644
--- a/java/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/src/main/java/com/google/protobuf/ByteString.java
@@ -37,6 +37,8 @@ import java.io.OutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
@@ -76,8 +78,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
- // Defined by java.nio.charset.Charset
- protected static final String UTF_8 = "UTF-8";
+ protected static final Charset UTF_8 = Charset.forName("UTF-8");
/**
* Empty {@code ByteString}.
@@ -269,11 +270,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new {@code ByteString}
*/
public static ByteString copyFromUtf8(String text) {
- try {
- return new LiteralByteString(text.getBytes(UTF_8));
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("UTF-8 not supported?", e);
- }
+ return new LiteralByteString(text.getBytes(UTF_8));
}
// =================================================================
@@ -612,8 +609,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string
* @throws UnsupportedEncodingException if charset isn't recognized
*/
- public abstract String toString(String charsetName)
- throws UnsupportedEncodingException;
+ public String toString(String charsetName)
+ throws UnsupportedEncodingException {
+ try {
+ return toString(Charset.forName(charsetName));
+ } catch (UnsupportedCharsetException e) {
+ UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
+ exception.initCause(e);
+ throw exception;
+ }
+ }
+
+ /**
+ * Constructs a new {@code String} by decoding the bytes using the
+ * specified charset. Returns the same empty String if empty.
+ *
+ * @param charset encode using this charset
+ * @return new string
+ */
+ public String toString(Charset charset) {
+ return size() == 0 ? "" : toStringInternal(charset);
+ }
+
+ /**
+ * Constructs a new {@code String} by decoding the bytes using the
+ * specified charset.
+ *
+ * @param charset encode using this charset
+ * @return new string
+ */
+ protected abstract String toStringInternal(Charset charset);
// =================================================================
// UTF-8 decoding
@@ -624,11 +649,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
* @return new string using UTF-8 encoding
*/
public String toStringUtf8() {
- try {
- return toString(UTF_8);
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("UTF-8 not supported?", e);
- }
+ return toString(UTF_8);
}
/**
diff --git a/java/src/main/java/com/google/protobuf/LiteralByteString.java b/java/src/main/java/com/google/protobuf/LiteralByteString.java
index 81d8e74b..3462c395 100644
--- a/java/src/main/java/com/google/protobuf/LiteralByteString.java
+++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java
@@ -36,6 +36,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
@@ -152,13 +153,8 @@ class LiteralByteString extends ByteString {
}
@Override
- public String toString(String charsetName)
- throws UnsupportedEncodingException {
- // Optimize for empty strings, but ensure we don't silently ignore invalid
- // encodings.
- return size() == 0 && UTF_8.equals(charsetName)
- ? ""
- : new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+ protected String toStringInternal(Charset charset) {
+ return new String(bytes, getOffsetIntoBytes(), size(), charset);
}
// =================================================================
diff --git a/java/src/main/java/com/google/protobuf/RopeByteString.java b/java/src/main/java/com/google/protobuf/RopeByteString.java
index 168bcce2..0900a042 100644
--- a/java/src/main/java/com/google/protobuf/RopeByteString.java
+++ b/java/src/main/java/com/google/protobuf/RopeByteString.java
@@ -38,6 +38,7 @@ import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.io.ByteArrayInputStream;
import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
@@ -418,13 +419,8 @@ class RopeByteString extends ByteString {
}
@Override
- public String toString(String charsetName)
- throws UnsupportedEncodingException {
- // Optimize for empty strings, but ensure we don't silently ignore invalid
- // encodings.
- return size() == 0 && UTF_8.equals(charsetName)
- ? ""
- : new String(toByteArray(), charsetName);
+ protected String toStringInternal(Charset charset) {
+ return new String(toByteArray(), charset);
}
// =================================================================
diff --git a/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java b/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
index 6c9596ca..a11bef2e 100644
--- a/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
@@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
testString.substring(2, testString.length() - 6), roundTripString);
}
+ @Override
+ public void testCharsetToString() throws UnsupportedEncodingException {
+ String testString = "I love unicode \u1234\u5678 characters";
+ LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
+ ByteString chopped = unicode.substring(2, unicode.size() - 6);
+ assertEquals(classUnderTest + ".substring() must have the expected type",
+ classUnderTest, getActualClassName(chopped));
+
+ String roundTripString = chopped.toString(ByteString.UTF_8);
+ assertEquals(classUnderTest + " unicode bytes must match",
+ testString.substring(2, testString.length() - 6), roundTripString);
+ }
+
public void testJavaSerialization() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(out);
diff --git a/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java b/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
index f3ad774f..8607040e 100644
--- a/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
@@ -298,6 +298,13 @@ public class LiteralByteStringTest extends TestCase {
assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
}
+ public void testCharsetToString() throws UnsupportedEncodingException {
+ String testString = "I love unicode \u1234\u5678 characters";
+ LiteralByteString unicode = new LiteralByteString(testString.getBytes(ByteString.UTF_8));
+ String roundTripString = unicode.toString(ByteString.UTF_8);
+ assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
+ }
+
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
assertSame(classUnderTest + " must be the same string references",
ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));
diff --git a/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java b/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
index 8106201d..43872d1d 100644
--- a/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
+++ b/java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
@@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
flatString.hashCode(), unicode.hashCode());
}
+
+ @Override
+ public void testCharsetToString() throws UnsupportedEncodingException {
+ String sourceString = "I love unicode \u1234\u5678 characters";
+ ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+ int copies = 250;
+
+ // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+ StringBuilder builder = new StringBuilder(copies * sourceString.length());
+ ByteString unicode = ByteString.EMPTY;
+ for (int i = 0; i < copies; ++i) {
+ builder.append(sourceString);
+ unicode = RopeByteString.concatenate(unicode, sourceByteString);
+ }
+ String testString = builder.toString();
+
+ // Do the substring part
+ testString = testString.substring(2, testString.length() - 6);
+ unicode = unicode.substring(2, unicode.size() - 6);
+
+ assertEquals(classUnderTest + " from string must have the expected type",
+ classUnderTest, getActualClassName(unicode));
+ String roundTripString = unicode.toString(ByteString.UTF_8);
+ assertEquals(classUnderTest + " unicode bytes must match",
+ testString, roundTripString);
+ ByteString flatString = ByteString.copyFromUtf8(testString);
+ assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+ assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+ flatString.hashCode(), unicode.hashCode());
+ }
}
diff --git a/java/src/test/java/com/google/protobuf/RopeByteStringTest.java b/java/src/test/java/com/google/protobuf/RopeByteStringTest.java
index 4775f03a..54eb9683 100644
--- a/java/src/test/java/com/google/protobuf/RopeByteStringTest.java
+++ b/java/src/test/java/com/google/protobuf/RopeByteStringTest.java
@@ -119,6 +119,32 @@ public class RopeByteStringTest extends LiteralByteStringTest {
}
@Override
+ public void testCharsetToString() throws UnsupportedEncodingException {
+ String sourceString = "I love unicode \u1234\u5678 characters";
+ ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+ int copies = 250;
+
+ // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+ StringBuilder builder = new StringBuilder(copies * sourceString.length());
+ ByteString unicode = ByteString.EMPTY;
+ for (int i = 0; i < copies; ++i) {
+ builder.append(sourceString);
+ unicode = RopeByteString.concatenate(unicode, sourceByteString);
+ }
+ String testString = builder.toString();
+
+ assertEquals(classUnderTest + " from string must have the expected type",
+ classUnderTest, getActualClassName(unicode));
+ String roundTripString = unicode.toString(ByteString.UTF_8);
+ assertEquals(classUnderTest + " unicode bytes must match",
+ testString, roundTripString);
+ ByteString flatString = ByteString.copyFromUtf8(testString);
+ assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+ assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+ flatString.hashCode(), unicode.hashCode());
+ }
+
+ @Override
public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
RopeByteString ropeByteString =
RopeByteString.newInstanceForTest(ByteString.EMPTY, ByteString.EMPTY);