aboutsummaryrefslogtreecommitdiff
path: root/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java')
-rw-r--r--java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java202
1 files changed, 115 insertions, 87 deletions
diff --git a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
index 321669f3..16a808bf 100644
--- a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
+++ b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
@@ -30,9 +30,13 @@
package com.google.protobuf;
-import static junit.framework.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
-import java.io.UnsupportedEncodingException;
+import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
@@ -52,64 +56,105 @@ import java.util.logging.Logger;
* @author jonp@google.com (Jon Perlow)
* @author martinrb@google.com (Martin Buchholz)
*/
-class IsValidUtf8TestUtil {
- private static Logger logger = Logger.getLogger(
- IsValidUtf8TestUtil.class.getName());
+final class IsValidUtf8TestUtil {
+ private static Logger logger = Logger.getLogger(IsValidUtf8TestUtil.class.getName());
+
+ private IsValidUtf8TestUtil() {}
+
+ static interface ByteStringFactory {
+ ByteString newByteString(byte[] bytes);
+ }
+
+ static final ByteStringFactory LITERAL_FACTORY = new ByteStringFactory() {
+ @Override
+ public ByteString newByteString(byte[] bytes) {
+ return ByteString.wrap(bytes);
+ }
+ };
+
+ static final ByteStringFactory HEAP_NIO_FACTORY = new ByteStringFactory() {
+ @Override
+ public ByteString newByteString(byte[] bytes) {
+ return new NioByteString(ByteBuffer.wrap(bytes));
+ }
+ };
+
+ private static ThreadLocal<SoftReference<ByteBuffer>> directBuffer =
+ new ThreadLocal<SoftReference<ByteBuffer>>();
+
+ /**
+ * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this
+ * uses a thread local direct buffer. This means that each call will overwrite the buffer's
+ * contents from the previous call, so the calling code must be careful not to continue using
+ * a buffer returned from a previous invocation.
+ */
+ static final ByteStringFactory DIRECT_NIO_FACTORY = new ByteStringFactory() {
+ @Override
+ public ByteString newByteString(byte[] bytes) {
+ SoftReference<ByteBuffer> ref = directBuffer.get();
+ ByteBuffer buffer = ref == null ? null : ref.get();
+ if (buffer == null || buffer.capacity() < bytes.length) {
+ buffer = ByteBuffer.allocateDirect(bytes.length);
+ directBuffer.set(new SoftReference<ByteBuffer>(buffer));
+ }
+ buffer.clear();
+ buffer.put(bytes);
+ buffer.flip();
+ return new NioByteString(buffer);
+ }
+ };
// 128 - [chars 0x0000 to 0x007f]
- static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
+ static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
// 128
- static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+ static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
// 1920 [chars 0x0080 to 0x07FF]
- static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
+ static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
// 18,304
- static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
+ static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
// Both bytes are one byte characters
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
// The possible number of two byte characters
TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
// 2048
- static long THREE_BYTE_SURROGATES = 2 * 1024;
+ static final long THREE_BYTE_SURROGATES = 2 * 1024;
// 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
- static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
+ static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
// 2,650,112
- static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
+ static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
// All one byte characters
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
// One two byte character and a one byte character
- 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
- // Three byte characters
+ 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+ // Three byte characters
THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
// 1,048,576 [chars 0x10000L to 0x10FFFF]
- static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
+ static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
// 289,571,839
- static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
+ static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
// All one byte characters
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
// One and three byte characters
- 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+ 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
// Two two byte characters
TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
// Permutations of one and two byte characters
- 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+ 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
+ * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
+ +
// Four byte characters
FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
- static class Shard {
+ static final class Shard {
final long index;
final long start;
final long lim;
@@ -138,7 +183,7 @@ class IsValidUtf8TestUtil {
// 97-111 are all 2342912
for (int i = 97; i <= 111; i++) {
- expected[i] = 2342912;
+ expected[i] = 2342912;
}
// 113-117 are all 1048576
@@ -158,22 +203,18 @@ class IsValidUtf8TestUtil {
return expected;
}
- static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
- 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
+ static final List<Shard> FOUR_BYTE_SHARDS =
+ generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
- private static List<Shard> generateFourByteShards(
- int numShards, long[] expected) {
+ private static List<Shard> generateFourByteShards(int numShards, long[] expected) {
assertEquals(numShards, expected.length);
List<Shard> shards = new ArrayList<Shard>(numShards);
long LIM = 1L << 32;
long increment = LIM / numShards;
assertTrue(LIM % numShards == 0);
for (int i = 0; i < numShards; i++) {
- shards.add(new Shard(i,
- increment * i,
- increment * (i + 1),
- expected[i]));
+ shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i]));
}
return shards;
}
@@ -182,12 +223,12 @@ class IsValidUtf8TestUtil {
* Helper to run the loop to test all the permutations for the number of bytes
* specified.
*
+ * @param factory the factory for {@link ByteString} instances.
* @param numBytes the number of bytes in the byte array
* @param expectedCount the expected number of roundtrippable permutations
*/
- static void testBytes(int numBytes, long expectedCount)
- throws UnsupportedEncodingException {
- testBytes(numBytes, expectedCount, 0, -1);
+ static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) {
+ testBytes(factory, numBytes, expectedCount, 0, -1);
}
/**
@@ -195,14 +236,15 @@ class IsValidUtf8TestUtil {
* specified. This overload is useful for debugging to get the loop to start
* at a certain character.
*
+ * @param factory the factory for {@link ByteString} instances.
* @param numBytes the number of bytes in the byte array
* @param expectedCount the expected number of roundtrippable permutations
* @param start the starting bytes encoded as a long as big-endian
* @param lim the limit of bytes to process encoded as a long as big-endian,
* or -1 to mean the max limit for numBytes
*/
- static void testBytes(int numBytes, long expectedCount, long start, long lim)
- throws UnsupportedEncodingException {
+ static void testBytes(
+ ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
Random rnd = new Random();
byte[] bytes = new byte[numBytes];
@@ -217,7 +259,7 @@ class IsValidUtf8TestUtil {
bytes[bytes.length - i - 1] = (byte) tmpByteChar;
tmpByteChar = tmpByteChar >> 8;
}
- ByteString bs = ByteString.copyFrom(bytes);
+ ByteString bs = factory.newByteString(bytes);
boolean isRoundTrippable = bs.isValidUtf8();
String s = new String(bytes, Internal.UTF_8);
byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
@@ -236,14 +278,15 @@ class IsValidUtf8TestUtil {
int i = rnd.nextInt(numBytes);
int j = rnd.nextInt(numBytes);
if (j < i) {
- int tmp = i; i = j; j = tmp;
+ int tmp = i;
+ i = j;
+ j = tmp;
}
int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
- System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
- state1, state2, state3, i, j);
+ System.out.printf("state=%04x %04x %04x i=%d j=%d%n", state1, state2, state3, i, j);
outputFailure(byteChar, bytes, bytesReencoded);
}
assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
@@ -251,36 +294,24 @@ class IsValidUtf8TestUtil {
// Test ropes built out of small partial sequences
ByteString rope = RopeByteString.newInstanceForTest(
bs.substring(0, i),
- RopeByteString.newInstanceForTest(
- bs.substring(i, j),
- bs.substring(j, numBytes)));
+ RopeByteString.newInstanceForTest(bs.substring(i, j), bs.substring(j, numBytes)));
assertSame(RopeByteString.class, rope.getClass());
- ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
+ ByteString[] byteStrings = {bs, bs.substring(0, numBytes), rope};
for (ByteString x : byteStrings) {
- assertEquals(isRoundTrippable,
- x.isValidUtf8());
- assertEquals(state3,
- x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
-
- assertEquals(state1,
- x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
- assertEquals(state1,
- x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
- assertEquals(state2,
- x.partialIsValidUtf8(state1, i, j - i));
- assertEquals(state2,
- x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
- assertEquals(state3,
- x.partialIsValidUtf8(state2, j, numBytes - j));
- assertEquals(state3,
- x.substring(j, numBytes)
- .partialIsValidUtf8(state2, 0, numBytes - j));
+ assertEquals(isRoundTrippable, x.isValidUtf8());
+ assertEquals(state3, x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
+
+ assertEquals(state1, x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+ assertEquals(state1, x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+ assertEquals(state2, x.partialIsValidUtf8(state1, i, j - i));
+ assertEquals(state2, x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
+ assertEquals(state3, x.partialIsValidUtf8(state2, j, numBytes - j));
+ assertEquals(state3, x.substring(j, numBytes).partialIsValidUtf8(state2, 0, numBytes - j));
}
// ByteString reduplication should not affect its UTF-8 validity.
- ByteString ropeADope =
- RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
+ ByteString ropeADope = RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
if (isRoundTrippable) {
@@ -288,8 +319,7 @@ class IsValidUtf8TestUtil {
}
count++;
if (byteChar != 0 && byteChar % 1000000L == 0) {
- logger.info("Processed " + (byteChar / 1000000L) +
- " million characters");
+ logger.info("Processed " + (byteChar / 1000000L) + " million characters");
}
}
logger.info("Round tripped " + countRoundTripped + " of " + count);
@@ -303,25 +333,26 @@ class IsValidUtf8TestUtil {
* actual String class, it's possible for incompatibilities to develop
* (although unlikely).
*
+ * @param factory the factory for {@link ByteString} instances.
* @param numBytes the number of bytes in the byte array
* @param expectedCount the expected number of roundtrippable permutations
* @param start the starting bytes encoded as a long as big-endian
* @param lim the limit of bytes to process encoded as a long as big-endian,
* or -1 to mean the max limit for numBytes
*/
- void testBytesUsingByteBuffers(
- int numBytes, long expectedCount, long start, long lim)
- throws UnsupportedEncodingException {
- CharsetDecoder decoder = Internal.UTF_8.newDecoder()
- .onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE);
- CharsetEncoder encoder = Internal.UTF_8.newEncoder()
- .onMalformedInput(CodingErrorAction.REPLACE)
- .onUnmappableCharacter(CodingErrorAction.REPLACE);
+ static void testBytesUsingByteBuffers(
+ ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
+ CharsetDecoder decoder =
+ Internal.UTF_8.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
+ CharsetEncoder encoder =
+ Internal.UTF_8.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPLACE)
+ .onUnmappableCharacter(CodingErrorAction.REPLACE);
byte[] bytes = new byte[numBytes];
int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
- char[] charsDecoded =
- new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
+ char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
byte[] bytesReencoded = new byte[maxBytes];
@@ -347,7 +378,7 @@ class IsValidUtf8TestUtil {
bytes[bytes.length - i - 1] = (byte) tmpByteChar;
tmpByteChar = tmpByteChar >> 8;
}
- boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
+ boolean isRoundTrippable = factory.newByteString(bytes).isValidUtf8();
CoderResult result = decoder.decode(bb, cb, true);
assertFalse(result.isError());
result = decoder.flush(cb);
@@ -382,8 +413,7 @@ class IsValidUtf8TestUtil {
countRoundTripped++;
}
if (byteChar != 0 && byteChar % 1000000 == 0) {
- logger.info("Processed " + (byteChar / 1000000) +
- " million characters");
+ logger.info("Processed " + (byteChar / 1000000) + " million characters");
}
}
logger.info("Round tripped " + countRoundTripped + " of " + count);
@@ -394,10 +424,9 @@ class IsValidUtf8TestUtil {
outputFailure(byteChar, bytes, after, after.length);
}
- private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
- int len) {
- fail("Failure: (" + Long.toHexString(byteChar) + ") " +
- toHexString(bytes) + " => " + toHexString(after, len));
+ private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) {
+ fail("Failure: (" + Long.toHexString(byteChar) + ") " + toHexString(bytes) + " => "
+ + toHexString(after, len));
}
private static String toHexString(byte[] b) {
@@ -416,5 +445,4 @@ class IsValidUtf8TestUtil {
s.append("\"");
return s.toString();
}
-
}