diff options
Diffstat (limited to 'java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java')
-rw-r--r-- | java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java | 202 |
1 files changed, 115 insertions, 87 deletions
diff --git a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java index 321669f3..16a808bf 100644 --- a/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java +++ b/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java @@ -30,9 +30,13 @@ package com.google.protobuf; -import static junit.framework.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; -import java.io.UnsupportedEncodingException; +import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetDecoder; @@ -52,64 +56,105 @@ import java.util.logging.Logger; * @author jonp@google.com (Jon Perlow) * @author martinrb@google.com (Martin Buchholz) */ -class IsValidUtf8TestUtil { - private static Logger logger = Logger.getLogger( - IsValidUtf8TestUtil.class.getName()); +final class IsValidUtf8TestUtil { + private static Logger logger = Logger.getLogger(IsValidUtf8TestUtil.class.getName()); + + private IsValidUtf8TestUtil() {} + + static interface ByteStringFactory { + ByteString newByteString(byte[] bytes); + } + + static final ByteStringFactory LITERAL_FACTORY = new ByteStringFactory() { + @Override + public ByteString newByteString(byte[] bytes) { + return ByteString.wrap(bytes); + } + }; + + static final ByteStringFactory HEAP_NIO_FACTORY = new ByteStringFactory() { + @Override + public ByteString newByteString(byte[] bytes) { + return new NioByteString(ByteBuffer.wrap(bytes)); + } + }; + + private static ThreadLocal<SoftReference<ByteBuffer>> directBuffer = + new ThreadLocal<SoftReference<ByteBuffer>>(); + + /** + * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this + * uses a thread local direct buffer. This means that each call will overwrite the buffer's + * contents from the previous call, so the calling code must be careful not to continue using + * a buffer returned from a previous invocation. + */ + static final ByteStringFactory DIRECT_NIO_FACTORY = new ByteStringFactory() { + @Override + public ByteString newByteString(byte[] bytes) { + SoftReference<ByteBuffer> ref = directBuffer.get(); + ByteBuffer buffer = ref == null ? null : ref.get(); + if (buffer == null || buffer.capacity() < bytes.length) { + buffer = ByteBuffer.allocateDirect(bytes.length); + directBuffer.set(new SoftReference<ByteBuffer>(buffer)); + } + buffer.clear(); + buffer.put(bytes); + buffer.flip(); + return new NioByteString(buffer); + } + }; // 128 - [chars 0x0000 to 0x007f] - static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1; + static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1; // 128 - static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; + static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; // 1920 [chars 0x0080 to 0x07FF] - static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1; + static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1; // 18,304 - static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = + static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = // Both bytes are one byte characters (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) + // The possible number of two byte characters TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS; // 2048 - static long THREE_BYTE_SURROGATES = 2 * 1024; + static final long THREE_BYTE_SURROGATES = 2 * 1024; // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates] - static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = + static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES; // 2,650,112 - static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = + static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = // All one byte characters (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) + // One two byte character and a one byte character - 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + - // Three byte characters + 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + + // Three byte characters THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS; // 1,048,576 [chars 0x10000L to 0x10FFFF] - static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1; + static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1; // 289,571,839 - static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = + static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = // All one byte characters (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) + // One and three byte characters - 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + + 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + // Two two byte characters TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS + // Permutations of one and two byte characters - 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS * - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + + 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + + // Four byte characters FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS; - static class Shard { + static final class Shard { final long index; final long start; final long lim; @@ -138,7 +183,7 @@ class IsValidUtf8TestUtil { // 97-111 are all 2342912 for (int i = 97; i <= 111; i++) { - expected[i] = 2342912; + expected[i] = 2342912; } // 113-117 are all 1048576 @@ -158,22 +203,18 @@ class IsValidUtf8TestUtil { return expected; } - static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards( - 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES); + static final List<Shard> FOUR_BYTE_SHARDS = + generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES); - private static List<Shard> generateFourByteShards( - int numShards, long[] expected) { + private static List<Shard> generateFourByteShards(int numShards, long[] expected) { assertEquals(numShards, expected.length); List<Shard> shards = new ArrayList<Shard>(numShards); long LIM = 1L << 32; long increment = LIM / numShards; assertTrue(LIM % numShards == 0); for (int i = 0; i < numShards; i++) { - shards.add(new Shard(i, - increment * i, - increment * (i + 1), - expected[i])); + shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i])); } return shards; } @@ -182,12 +223,12 @@ class IsValidUtf8TestUtil { * Helper to run the loop to test all the permutations for the number of bytes * specified. * + * @param factory the factory for {@link ByteString} instances. * @param numBytes the number of bytes in the byte array * @param expectedCount the expected number of roundtrippable permutations */ - static void testBytes(int numBytes, long expectedCount) - throws UnsupportedEncodingException { - testBytes(numBytes, expectedCount, 0, -1); + static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) { + testBytes(factory, numBytes, expectedCount, 0, -1); } /** @@ -195,14 +236,15 @@ class IsValidUtf8TestUtil { * specified. This overload is useful for debugging to get the loop to start * at a certain character. * + * @param factory the factory for {@link ByteString} instances. * @param numBytes the number of bytes in the byte array * @param expectedCount the expected number of roundtrippable permutations * @param start the starting bytes encoded as a long as big-endian * @param lim the limit of bytes to process encoded as a long as big-endian, * or -1 to mean the max limit for numBytes */ - static void testBytes(int numBytes, long expectedCount, long start, long lim) - throws UnsupportedEncodingException { + static void testBytes( + ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) { Random rnd = new Random(); byte[] bytes = new byte[numBytes]; @@ -217,7 +259,7 @@ class IsValidUtf8TestUtil { bytes[bytes.length - i - 1] = (byte) tmpByteChar; tmpByteChar = tmpByteChar >> 8; } - ByteString bs = ByteString.copyFrom(bytes); + ByteString bs = factory.newByteString(bytes); boolean isRoundTrippable = bs.isValidUtf8(); String s = new String(bytes, Internal.UTF_8); byte[] bytesReencoded = s.getBytes(Internal.UTF_8); @@ -236,14 +278,15 @@ class IsValidUtf8TestUtil { int i = rnd.nextInt(numBytes); int j = rnd.nextInt(numBytes); if (j < i) { - int tmp = i; i = j; j = tmp; + int tmp = i; + i = j; + j = tmp; } int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i); int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j); int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes); if (isRoundTrippable != (state3 == Utf8.COMPLETE)) { - System.out.printf("state=%04x %04x %04x i=%d j=%d%n", - state1, state2, state3, i, j); + System.out.printf("state=%04x %04x %04x i=%d j=%d%n", state1, state2, state3, i, j); outputFailure(byteChar, bytes, bytesReencoded); } assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE)); @@ -251,36 +294,24 @@ class IsValidUtf8TestUtil { // Test ropes built out of small partial sequences ByteString rope = RopeByteString.newInstanceForTest( bs.substring(0, i), - RopeByteString.newInstanceForTest( - bs.substring(i, j), - bs.substring(j, numBytes))); + RopeByteString.newInstanceForTest(bs.substring(i, j), bs.substring(j, numBytes))); assertSame(RopeByteString.class, rope.getClass()); - ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope }; + ByteString[] byteStrings = {bs, bs.substring(0, numBytes), rope}; for (ByteString x : byteStrings) { - assertEquals(isRoundTrippable, - x.isValidUtf8()); - assertEquals(state3, - x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes)); - - assertEquals(state1, - x.partialIsValidUtf8(Utf8.COMPLETE, 0, i)); - assertEquals(state1, - x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i)); - assertEquals(state2, - x.partialIsValidUtf8(state1, i, j - i)); - assertEquals(state2, - x.substring(i, j).partialIsValidUtf8(state1, 0, j - i)); - assertEquals(state3, - x.partialIsValidUtf8(state2, j, numBytes - j)); - assertEquals(state3, - x.substring(j, numBytes) - .partialIsValidUtf8(state2, 0, numBytes - j)); + assertEquals(isRoundTrippable, x.isValidUtf8()); + assertEquals(state3, x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes)); + + assertEquals(state1, x.partialIsValidUtf8(Utf8.COMPLETE, 0, i)); + assertEquals(state1, x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i)); + assertEquals(state2, x.partialIsValidUtf8(state1, i, j - i)); + assertEquals(state2, x.substring(i, j).partialIsValidUtf8(state1, 0, j - i)); + assertEquals(state3, x.partialIsValidUtf8(state2, j, numBytes - j)); + assertEquals(state3, x.substring(j, numBytes).partialIsValidUtf8(state2, 0, numBytes - j)); } // ByteString reduplication should not affect its UTF-8 validity. - ByteString ropeADope = - RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes)); + ByteString ropeADope = RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes)); assertEquals(isRoundTrippable, ropeADope.isValidUtf8()); if (isRoundTrippable) { @@ -288,8 +319,7 @@ class IsValidUtf8TestUtil { } count++; if (byteChar != 0 && byteChar % 1000000L == 0) { - logger.info("Processed " + (byteChar / 1000000L) + - " million characters"); + logger.info("Processed " + (byteChar / 1000000L) + " million characters"); } } logger.info("Round tripped " + countRoundTripped + " of " + count); @@ -303,25 +333,26 @@ class IsValidUtf8TestUtil { * actual String class, it's possible for incompatibilities to develop * (although unlikely). * + * @param factory the factory for {@link ByteString} instances. * @param numBytes the number of bytes in the byte array * @param expectedCount the expected number of roundtrippable permutations * @param start the starting bytes encoded as a long as big-endian * @param lim the limit of bytes to process encoded as a long as big-endian, * or -1 to mean the max limit for numBytes */ - void testBytesUsingByteBuffers( - int numBytes, long expectedCount, long start, long lim) - throws UnsupportedEncodingException { - CharsetDecoder decoder = Internal.UTF_8.newDecoder() - .onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); - CharsetEncoder encoder = Internal.UTF_8.newEncoder() - .onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); + static void testBytesUsingByteBuffers( + ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) { + CharsetDecoder decoder = + Internal.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + CharsetEncoder encoder = + Internal.UTF_8.newEncoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); byte[] bytes = new byte[numBytes]; int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1; - char[] charsDecoded = - new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1]; + char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1]; int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1; byte[] bytesReencoded = new byte[maxBytes]; @@ -347,7 +378,7 @@ class IsValidUtf8TestUtil { bytes[bytes.length - i - 1] = (byte) tmpByteChar; tmpByteChar = tmpByteChar >> 8; } - boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8(); + boolean isRoundTrippable = factory.newByteString(bytes).isValidUtf8(); CoderResult result = decoder.decode(bb, cb, true); assertFalse(result.isError()); result = decoder.flush(cb); @@ -382,8 +413,7 @@ class IsValidUtf8TestUtil { countRoundTripped++; } if (byteChar != 0 && byteChar % 1000000 == 0) { - logger.info("Processed " + (byteChar / 1000000) + - " million characters"); + logger.info("Processed " + (byteChar / 1000000) + " million characters"); } } logger.info("Round tripped " + countRoundTripped + " of " + count); @@ -394,10 +424,9 @@ class IsValidUtf8TestUtil { outputFailure(byteChar, bytes, after, after.length); } - private static void outputFailure(long byteChar, byte[] bytes, byte[] after, - int len) { - fail("Failure: (" + Long.toHexString(byteChar) + ") " + - toHexString(bytes) + " => " + toHexString(after, len)); + private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) { + fail("Failure: (" + Long.toHexString(byteChar) + ") " + toHexString(bytes) + " => " + + toHexString(after, len)); } private static String toHexString(byte[] b) { @@ -416,5 +445,4 @@ class IsValidUtf8TestUtil { s.append("\""); return s.toString(); } - } |