diff options
Diffstat (limited to 'java/core/src/main/java')
25 files changed, 3651 insertions, 1171 deletions
diff --git a/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java b/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java new file mode 100644 index 00000000..0cc38175 --- /dev/null +++ b/java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java @@ -0,0 +1,145 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.lang.ref.SoftReference; +import java.nio.ByteBuffer; + +/** + * Utility class to provide efficient writing of {@link ByteBuffer}s to {@link OutputStream}s. + */ +final class ByteBufferWriter { + private ByteBufferWriter() {} + + /** + * Minimum size for a cached buffer. This prevents us from allocating buffers that are too + * small to be easily reused. + */ + // TODO(nathanmittler): tune this property or allow configuration? + private static final int MIN_CACHED_BUFFER_SIZE = 1024; + + /** + * Maximum size for a cached buffer. If a larger buffer is required, it will be allocated + * but not cached. + */ + // TODO(nathanmittler): tune this property or allow configuration? + private static final int MAX_CACHED_BUFFER_SIZE = 16 * 1024; + + /** + * The fraction of the requested buffer size under which the buffer will be reallocated. + */ + // TODO(nathanmittler): tune this property or allow configuration? + private static final float BUFFER_REALLOCATION_THRESHOLD = 0.5f; + + /** + * Keeping a soft reference to a thread-local buffer. This buffer is used for writing a + * {@link ByteBuffer} to an {@link OutputStream} when no zero-copy alternative was available. + * Using a "soft" reference since VMs may keep this reference around longer than "weak" + * (e.g. HotSpot will maintain soft references until memory pressure warrants collection). + */ + private static final ThreadLocal<SoftReference<byte[]>> BUFFER = + new ThreadLocal<SoftReference<byte[]>>(); + + /** + * For testing purposes only. Clears the cached buffer to force a new allocation on the next + * invocation. + */ + static void clearCachedBuffer() { + BUFFER.set(null); + } + + /** + * Writes the remaining content of the buffer to the given stream. The buffer {@code position} + * will remain unchanged by this method. + */ + static void write(ByteBuffer buffer, OutputStream output) throws IOException { + final int initialPos = buffer.position(); + try { + if (buffer.hasArray()) { + // Optimized write for array-backed buffers. + // Note that we're taking the risk that a malicious OutputStream could modify the array. + output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining()); + } else if (output instanceof FileOutputStream) { + // Use a channel to write out the ByteBuffer. This will automatically empty the buffer. + ((FileOutputStream) output).getChannel().write(buffer); + } else { + // Read all of the data from the buffer to an array. + // TODO(nathanmittler): Consider performance improvements for other "known" stream types. + final byte[] array = getOrCreateBuffer(buffer.remaining()); + while (buffer.hasRemaining()) { + int length = min(buffer.remaining(), array.length); + buffer.get(array, 0, length); + output.write(array, 0, length); + } + } + } finally { + // Restore the initial position. + buffer.position(initialPos); + } + } + + private static byte[] getOrCreateBuffer(int requestedSize) { + requestedSize = max(requestedSize, MIN_CACHED_BUFFER_SIZE); + + byte[] buffer = getBuffer(); + // Only allocate if we need to. + if (buffer == null || needToReallocate(requestedSize, buffer.length)) { + buffer = new byte[requestedSize]; + + // Only cache the buffer if it's not too big. + if (requestedSize <= MAX_CACHED_BUFFER_SIZE) { + setBuffer(buffer); + } + } + return buffer; + } + + private static boolean needToReallocate(int requestedSize, int bufferLength) { + // First check against just the requested length to avoid the multiply. + return bufferLength < requestedSize + && bufferLength < requestedSize * BUFFER_REALLOCATION_THRESHOLD; + } + + private static byte[] getBuffer() { + SoftReference<byte[]> sr = BUFFER.get(); + return sr == null ? null : sr.get(); + } + + private static void setBuffer(byte[] value) { + BUFFER.set(new SoftReference<byte[]>(value)); + } +} diff --git a/java/core/src/main/java/com/google/protobuf/ByteOutput.java b/java/core/src/main/java/com/google/protobuf/ByteOutput.java new file mode 100644 index 00000000..8b7b04c8 --- /dev/null +++ b/java/core/src/main/java/com/google/protobuf/ByteOutput.java @@ -0,0 +1,116 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * An output target for raw bytes. This interface provides semantics that support two types of + * writing: + * + * <p/><b>Traditional write operations:</b> + * (as defined by {@link java.io.OutputStream}) where the target method is responsible for either + * copying the data or completing the write before returning from the method call. + * + * <p/><b>Lazy write operations:</b> where the caller guarantees that it will never modify the + * provided buffer and it can therefore be considered immutable. The target method is free to + * maintain a reference to the buffer beyond the scope of the method call (e.g. until the write + * operation completes). + */ +@ExperimentalApi +public abstract class ByteOutput { + /** + * Writes a single byte. + * + * @param value the byte to be written + * @throws IOException thrown if an error occurred while writing + */ + public abstract void write(byte value) throws IOException; + + /** + * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will + * not be processed prior to the return of this method call, since {@code value} may be + * reused/altered by the caller. + * + * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a + * programming error and will lead to data corruption which will be difficult to debug. + * + * @param value the bytes to be written + * @param offset the offset of the start of the writable range + * @param length the number of bytes to write starting from {@code offset} + * @throws IOException thrown if an error occurred while writing + */ + public abstract void write(byte[] value, int offset, int length) throws IOException; + + /** + * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value + * beyond the scope of this method call (e.g. write later) since it is considered immutable and is + * guaranteed not to change by the caller. + * + * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a + * programming error and will lead to data corruption which will be difficult to debug. + * + * @param value the bytes to be written + * @param offset the offset of the start of the writable range + * @param length the number of bytes to write starting from {@code offset} + * @throws IOException thrown if an error occurred while writing + */ + public abstract void writeLazy(byte[] value, int offset, int length) throws IOException; + + /** + * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will + * not be processed prior to the return of this method call, since {@code value} may be + * reused/altered by the caller. + * + * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a + * programming error and will lead to data corruption which will be difficult to debug. + * + * @param value the bytes to be written. Upon returning from this call, the {@code position} of + * this buffer will be set to the {@code limit} + * @throws IOException thrown if an error occurred while writing + */ + public abstract void write(ByteBuffer value) throws IOException; + + /** + * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value + * beyond the scope of this method call (e.g. write later) since it is considered immutable and is + * guaranteed not to change by the caller. + * + * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a + * programming error and will lead to data corruption which will be difficult to debug. + * + * @param value the bytes to be written. Upon returning from this call, the {@code position} of + * this buffer will be set to the {@code limit} + * @throws IOException thrown if an error occurred while writing + */ + public abstract void writeLazy(ByteBuffer value) throws IOException; +} diff --git a/java/core/src/main/java/com/google/protobuf/ByteString.java b/java/core/src/main/java/com/google/protobuf/ByteString.java index 305236f3..62c94508 100644 --- a/java/core/src/main/java/com/google/protobuf/ByteString.java +++ b/java/core/src/main/java/com/google/protobuf/ByteString.java @@ -1,4 +1,32 @@ -// Copyright 2007 Google Inc. All rights reserved. +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.protobuf; @@ -15,6 +43,7 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Iterator; @@ -58,6 +87,54 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * Empty {@code ByteString}. */ public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY); + + /** + * An interface to efficiently copy {@code byte[]}. + * + * <p>One of the noticable costs of copying a byte[] into a new array using + * {@code System.arraycopy} is nullification of a new buffer before the copy. It has been shown + * the Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this + * expensive nullification and provide substantial performance gain. Unfortunately this does not + * hold on Android runtimes and could make the copy slightly slower due to additional code in + * the {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier + * for Hotspot and Android runtimes. + */ + private interface ByteArrayCopier { + /** + * Copies the specified range of the specified array into a new array + */ + byte[] copyFrom(byte[] bytes, int offset, int size); + } + + /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */ + private static final class SystemByteArrayCopier implements ByteArrayCopier { + @Override + public byte[] copyFrom(byte[] bytes, int offset, int size) { + byte[] copy = new byte[size]; + System.arraycopy(bytes, offset, copy, 0, size); + return copy; + } + } + + /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */ + private static final class ArraysByteArrayCopier implements ByteArrayCopier { + @Override + public byte[] copyFrom(byte[] bytes, int offset, int size) { + return Arrays.copyOfRange(bytes, offset, offset + size); + } + } + + private static final ByteArrayCopier byteArrayCopier; + static { + boolean isAndroid = true; + try { + Class.forName("android.content.Context"); + } catch (ClassNotFoundException e) { + isAndroid = false; + } + + byteArrayCopier = isAndroid ? new SystemByteArrayCopier() : new ArraysByteArrayCopier(); + } /** * Cached hash value. Intentionally accessed via a data race, which @@ -77,7 +154,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * * @param index index of byte * @return the value - * @throws ArrayIndexOutOfBoundsException {@code index < 0 or index >= size} + * @throws IndexOutOfBoundsException {@code index < 0 or index >= size} */ public abstract byte byteAt(int index); @@ -109,7 +186,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { public byte nextByte() { try { return byteAt(position++); - } catch (ArrayIndexOutOfBoundsException e) { + } catch (IndexOutOfBoundsException e) { throw new NoSuchElementException(e.getMessage()); } } @@ -220,9 +297,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @return new {@code ByteString} */ public static ByteString copyFrom(byte[] bytes, int offset, int size) { - byte[] copy = new byte[size]; - System.arraycopy(bytes, offset, copy, 0, size); - return new LiteralByteString(copy); + return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size)); } /** @@ -559,12 +634,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { } /** - * Writes the complete contents of this byte string to - * the specified output stream argument. - * - * <p>It is assumed that the {@link OutputStream} will not modify the contents passed it - * it. It may be possible for a malicious {@link OutputStream} to corrupt - * the data underlying the {@link ByteString}. + * Writes a copy of the contents of this byte string to the specified output stream argument. * * @param out the output stream to which to write the data. * @throws IOException if an I/O error occurs. @@ -578,8 +648,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @param sourceOffset offset within these bytes * @param numberToWrite number of bytes to write * @throws IOException if an I/O error occurs. - * @throws IndexOutOfBoundsException if an offset or size is negative or too - * large + * @throws IndexOutOfBoundsException if an offset or size is negative or too large */ final void writeTo(OutputStream out, int sourceOffset, int numberToWrite) throws IOException { @@ -597,6 +666,20 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { throws IOException; /** + * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling + * this method may result in multiple operations on the target {@link ByteOutput}. + * + * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link + * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious + * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution! + * + * @param byteOutput the output target to receive the bytes + * @throws IOException if an I/O error occurs + * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput) + */ + abstract void writeTo(ByteOutput byteOutput) throws IOException; + + /** * Constructs a read-only {@code java.nio.ByteBuffer} whose content * is equal to the contents of this byte string. * The result uses the same backing array as the byte string, if possible. @@ -1102,7 +1185,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * * @param index the index position to be tested * @param size the length of the array - * @throws ArrayIndexOutOfBoundsException if the index does not fall within the array. + * @throws IndexOutOfBoundsException if the index does not fall within the array. */ static void checkIndex(int index, int size) { if ((index | (size - (index + 1))) < 0) { @@ -1120,7 +1203,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { * @param endIndex the end index of the range (exclusive) * @param size the size of the array. * @return the length of the range. - * @throws ArrayIndexOutOfBoundsException some or all of the range falls outside of the array. + * @throws IndexOutOfBoundsException some or all of the range falls outside of the array. */ static int checkRange(int startIndex, int endIndex, int size) { final int length = endIndex - startIndex; @@ -1236,6 +1319,11 @@ public abstract class ByteString implements Iterable<Byte>, Serializable { } @Override + final void writeTo(ByteOutput output) throws IOException { + output.writeLazy(bytes, getOffsetIntoBytes(), size()); + } + + @Override protected final String toStringInternal(Charset charset) { return new String(bytes, getOffsetIntoBytes(), size(), charset); } diff --git a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java index b3118ee0..e8860651 100644 --- a/java/core/src/main/java/com/google/protobuf/CodedInputStream.java +++ b/java/core/src/main/java/com/google/protobuf/CodedInputStream.java @@ -55,7 +55,14 @@ public final class CodedInputStream { * Create a new CodedInputStream wrapping the given InputStream. */ public static CodedInputStream newInstance(final InputStream input) { - return new CodedInputStream(input); + return new CodedInputStream(input, BUFFER_SIZE); + } + + /** + * Create a new CodedInputStream wrapping the given InputStream. + */ + static CodedInputStream newInstance(final InputStream input, int bufferSize) { + return new CodedInputStream(input, bufferSize); } /** @@ -70,14 +77,14 @@ public final class CodedInputStream { */ public static CodedInputStream newInstance(final byte[] buf, final int off, final int len) { - return newInstance(buf, off, len, false); + return newInstance(buf, off, len, false /* bufferIsImmutable */); } - + /** * Create a new CodedInputStream wrapping the given byte array slice. */ - public static CodedInputStream newInstance(final byte[] buf, final int off, - final int len, boolean bufferIsImmutable) { + static CodedInputStream newInstance( + final byte[] buf, final int off, final int len, final boolean bufferIsImmutable) { CodedInputStream result = new CodedInputStream(buf, off, len, bufferIsImmutable); try { // Some uses of CodedInputStream can be more efficient if they know @@ -361,6 +368,11 @@ public final class CodedInputStream { return result; } else if (size == 0) { return ""; + } else if (size <= bufferSize) { + refillBuffer(size); + String result = new String(buffer, bufferPos, size, Internal.UTF_8); + bufferPos += size; + return result; } else { // Slow path: Build a byte array first then copy it. return new String(readRawBytesSlowPath(size), Internal.UTF_8); @@ -375,14 +387,21 @@ public final class CodedInputStream { public String readStringRequireUtf8() throws IOException { final int size = readRawVarint32(); final byte[] bytes; - int pos = bufferPos; - if (size <= (bufferSize - pos) && size > 0) { + final int oldPos = bufferPos; + final int pos; + if (size <= (bufferSize - oldPos) && size > 0) { // Fast path: We already have the bytes in a contiguous buffer, so // just copy directly from it. bytes = buffer; - bufferPos = pos + size; + bufferPos = oldPos + size; + pos = oldPos; } else if (size == 0) { return ""; + } else if (size <= bufferSize) { + refillBuffer(size); + bytes = buffer; + pos = 0; + bufferPos = pos + size; } else { // Slow path: Build a byte array first then copy it. bytes = readRawBytesSlowPath(size); @@ -869,7 +888,8 @@ public final class CodedInputStream { private static final int DEFAULT_SIZE_LIMIT = 64 << 20; // 64MB private static final int BUFFER_SIZE = 4096; - private CodedInputStream(final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) { + private CodedInputStream( + final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) { this.buffer = buffer; bufferSize = off + len; bufferPos = off; @@ -878,8 +898,8 @@ public final class CodedInputStream { this.bufferIsImmutable = bufferIsImmutable; } - private CodedInputStream(final InputStream input) { - buffer = new byte[BUFFER_SIZE]; + private CodedInputStream(final InputStream input, int bufferSize) { + buffer = new byte[bufferSize]; bufferSize = 0; bufferPos = 0; totalBytesRetired = 0; diff --git a/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java index d8ebad21..b92394b8 100644 --- a/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java +++ b/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java @@ -49,13 +49,17 @@ import java.util.logging.Logger; * you are writing some other format of your own design, use the latter. * * <p>This class is totally unsynchronized. - * - * @author kneton@google.com Kenton Varda */ public final class CodedOutputStream { - private static final Logger logger = Logger.getLogger(CodedOutputStream.class.getName()); + private static final int LITTLE_ENDIAN_64_SIZE = 8; + + /** + * @deprecated Use {@link #computeFixed32SizeNoTag(int)} instead. + */ + @Deprecated public static final int LITTLE_ENDIAN_32_SIZE = 4; + // TODO(dweis): Consider migrating to a ByteBuffer. private final byte[] buffer; private final int limit; @@ -77,12 +81,13 @@ public final class CodedOutputStream { * CodedOutputStream. */ static int computePreferredBufferSize(int dataLength) { - if (dataLength > DEFAULT_BUFFER_SIZE) return DEFAULT_BUFFER_SIZE; + if (dataLength > DEFAULT_BUFFER_SIZE) { + return DEFAULT_BUFFER_SIZE; + } return dataLength; } - private CodedOutputStream(final byte[] buffer, final int offset, - final int length) { + private CodedOutputStream(final byte[] buffer, final int offset, final int length) { output = null; this.buffer = buffer; position = offset; @@ -108,8 +113,7 @@ public final class CodedOutputStream { * Create a new {@code CodedOutputStream} wrapping the given * {@code OutputStream} with a given buffer size. */ - public static CodedOutputStream newInstance(final OutputStream output, - final int bufferSize) { + public static CodedOutputStream newInstance(final OutputStream output, final int bufferSize) { return new CodedOutputStream(output, new byte[bufferSize]); } @@ -131,9 +135,8 @@ public final class CodedOutputStream { * array is faster than writing to an {@code OutputStream}. See also * {@link ByteString#newCodedBuilder}. */ - public static CodedOutputStream newInstance(final byte[] flatArray, - final int offset, - final int length) { + public static CodedOutputStream newInstance( + final byte[] flatArray, final int offset, final int length) { return new CodedOutputStream(flatArray, offset, length); } @@ -147,13 +150,13 @@ public final class CodedOutputStream { /** * Create a new {@code CodedOutputStream} that writes to the given ByteBuffer. */ - public static CodedOutputStream newInstance(ByteBuffer byteBuffer, - int bufferSize) { + public static CodedOutputStream newInstance(ByteBuffer byteBuffer, int bufferSize) { return newInstance(new ByteBufferOutputStream(byteBuffer), bufferSize); } private static class ByteBufferOutputStream extends OutputStream { private final ByteBuffer byteBuffer; + public ByteBufferOutputStream(ByteBuffer byteBuffer) { this.byteBuffer = byteBuffer; } @@ -171,106 +174,120 @@ public final class CodedOutputStream { // ----------------------------------------------------------------- - /** Write a {@code double} field, including tag, to the stream. */ - public void writeDouble(final int fieldNumber, final double value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64); - writeDoubleNoTag(value); + /** Encode and write a tag. */ + public void writeTag(final int fieldNumber, final int wireType) throws IOException { + writeRawVarint32(WireFormat.makeTag(fieldNumber, wireType)); } - /** Write a {@code float} field, including tag, to the stream. */ - public void writeFloat(final int fieldNumber, final float value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32); - writeFloatNoTag(value); + /** Write an {@code int32} field, including tag, to the stream. */ + public void writeInt32(final int fieldNumber, final int value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); + writeInt32NoTag(value); } - /** Write a {@code uint64} field, including tag, to the stream. */ - public void writeUInt64(final int fieldNumber, final long value) - throws IOException { + /** Write a {@code uint32} field, including tag, to the stream. */ + public void writeUInt32(final int fieldNumber, final int value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeUInt64NoTag(value); + writeUInt32NoTag(value); + } + + /** Write a {@code sint32} field, including tag, to the stream. */ + public void writeSInt32(final int fieldNumber, final int value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); + writeSInt32NoTag(value); + } + + /** Write a {@code fixed32} field, including tag, to the stream. */ + public void writeFixed32(final int fieldNumber, final int value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32); + writeFixed32NoTag(value); + } + + /** Write an {@code sfixed32} field, including tag, to the stream. */ + public void writeSFixed32(final int fieldNumber, final int value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32); + writeSFixed32NoTag(value); } /** Write an {@code int64} field, including tag, to the stream. */ - public void writeInt64(final int fieldNumber, final long value) - throws IOException { + public void writeInt64(final int fieldNumber, final long value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); writeInt64NoTag(value); } - /** Write an {@code int32} field, including tag, to the stream. */ - public void writeInt32(final int fieldNumber, final int value) - throws IOException { + /** Write a {@code uint64} field, including tag, to the stream. */ + public void writeUInt64(final int fieldNumber, final long value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeInt32NoTag(value); + writeUInt64NoTag(value); + } + + /** Write an {@code sint64} field, including tag, to the stream. */ + public void writeSInt64(final int fieldNumber, final long value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); + writeSInt64NoTag(value); } /** Write a {@code fixed64} field, including tag, to the stream. */ - public void writeFixed64(final int fieldNumber, final long value) - throws IOException { + public void writeFixed64(final int fieldNumber, final long value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64); writeFixed64NoTag(value); } - /** Write a {@code fixed32} field, including tag, to the stream. */ - public void writeFixed32(final int fieldNumber, final int value) - throws IOException { + /** Write an {@code sfixed64} field, including tag, to the stream. */ + public void writeSFixed64(final int fieldNumber, final long value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64); + writeSFixed64NoTag(value); + } + + /** Write a {@code float} field, including tag, to the stream. */ + public void writeFloat(final int fieldNumber, final float value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32); - writeFixed32NoTag(value); + writeFloatNoTag(value); + } + + /** Write a {@code double} field, including tag, to the stream. */ + public void writeDouble(final int fieldNumber, final double value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64); + writeDoubleNoTag(value); } /** Write a {@code bool} field, including tag, to the stream. */ - public void writeBool(final int fieldNumber, final boolean value) - throws IOException { + public void writeBool(final int fieldNumber, final boolean value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); writeBoolNoTag(value); } - /** Write a {@code string} field, including tag, to the stream. */ - public void writeString(final int fieldNumber, final String value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); - writeStringNoTag(value); - } - - /** Write a {@code group} field, including tag, to the stream. */ - public void writeGroup(final int fieldNumber, final MessageLite value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_START_GROUP); - writeGroupNoTag(value); - writeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP); + /** + * Write an enum field, including tag, to the stream. The provided value is the numeric + * value used to represent the enum value on the wire (not the enum ordinal value). + */ + public void writeEnum(final int fieldNumber, final int value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); + writeEnumNoTag(value); } - - /** Write an embedded message field, including tag, to the stream. */ - public void writeMessage(final int fieldNumber, final MessageLite value) - throws IOException { + /** Write a {@code string} field, including tag, to the stream. */ + public void writeString(final int fieldNumber, final String value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); - writeMessageNoTag(value); + writeStringNoTag(value); } - /** Write a {@code bytes} field, including tag, to the stream. */ - public void writeBytes(final int fieldNumber, final ByteString value) - throws IOException { + public void writeBytes(final int fieldNumber, final ByteString value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); writeBytesNoTag(value); } /** Write a {@code bytes} field, including tag, to the stream. */ - public void writeByteArray(final int fieldNumber, final byte[] value) - throws IOException { + public void writeByteArray(final int fieldNumber, final byte[] value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); writeByteArrayNoTag(value); } /** Write a {@code bytes} field, including tag, to the stream. */ - public void writeByteArray(final int fieldNumber, - final byte[] value, - final int offset, - final int length) - throws IOException { + public void writeByteArray( + final int fieldNumber, final byte[] value, final int offset, final int length) + throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); writeByteArrayNoTag(value, offset, length); } @@ -285,64 +302,100 @@ public final class CodedOutputStream { * of a ByteBuffer, you can call * {@code writeByteBuffer(fieldNumber, byteBuffer.slice())}. */ - public void writeByteBuffer(final int fieldNumber, final ByteBuffer value) - throws IOException { + public void writeByteBuffer(final int fieldNumber, final ByteBuffer value) throws IOException { writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); writeByteBufferNoTag(value); } - /** Write a {@code uint32} field, including tag, to the stream. */ - public void writeUInt32(final int fieldNumber, final int value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeUInt32NoTag(value); + /** Write a single byte. */ + public void writeRawByte(final byte value) throws IOException { + if (position == limit) { + refreshBuffer(); + } + + buffer[position++] = value; + ++totalBytesWritten; } - /** - * Write an enum field, including tag, to the stream. Caller is responsible - * for converting the enum value to its numeric value. - */ - public void writeEnum(final int fieldNumber, final int value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeEnumNoTag(value); + /** Write a single byte, represented by an integer value. */ + public void writeRawByte(final int value) throws IOException { + writeRawByte((byte) value); } - /** Write an {@code sfixed32} field, including tag, to the stream. */ - public void writeSFixed32(final int fieldNumber, final int value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED32); - writeSFixed32NoTag(value); + /** Write an array of bytes. */ + public void writeRawBytes(final byte[] value) throws IOException { + writeRawBytes(value, 0, value.length); } - /** Write an {@code sfixed64} field, including tag, to the stream. */ - public void writeSFixed64(final int fieldNumber, final long value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_FIXED64); - writeSFixed64NoTag(value); + /** Write part of an array of bytes. */ + public void writeRawBytes(final byte[] value, int offset, int length) throws IOException { + if (limit - position >= length) { + // We have room in the current buffer. + System.arraycopy(value, offset, buffer, position, length); + position += length; + totalBytesWritten += length; + } else { + // Write extends past current buffer. Fill the rest of this buffer and + // flush. + final int bytesWritten = limit - position; + System.arraycopy(value, offset, buffer, position, bytesWritten); + offset += bytesWritten; + length -= bytesWritten; + position = limit; + totalBytesWritten += bytesWritten; + refreshBuffer(); + + // Now deal with the rest. + // Since we have an output stream, this is our buffer + // and buffer offset == 0 + if (length <= limit) { + // Fits in new buffer. + System.arraycopy(value, offset, buffer, 0, length); + position = length; + } else { + // Write is very big. Let's do it all at once. + output.write(value, offset, length); + } + totalBytesWritten += length; + } } - /** Write an {@code sint32} field, including tag, to the stream. */ - public void writeSInt32(final int fieldNumber, final int value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeSInt32NoTag(value); + /** Write a byte string. */ + public void writeRawBytes(final ByteString value) throws IOException { + writeRawBytes(value, 0, value.size()); } - /** Write an {@code sint64} field, including tag, to the stream. */ - public void writeSInt64(final int fieldNumber, final long value) - throws IOException { - writeTag(fieldNumber, WireFormat.WIRETYPE_VARINT); - writeSInt64NoTag(value); + /** + * Write a ByteBuffer. This method will write all content of the ByteBuffer + * regardless of the current position and limit (i.e., the number of bytes + * to be written is value.capacity(), not value.remaining()). Furthermore, + * this method doesn't alter the state of the passed-in ByteBuffer. Its + * position, limit, mark, etc. will remain unchanged. If you only want to + * write the remaining bytes of a ByteBuffer, you can call + * {@code writeRawBytes(byteBuffer.slice())}. + */ + public void writeRawBytes(final ByteBuffer value) throws IOException { + if (value.hasArray()) { + writeRawBytes(value.array(), value.arrayOffset(), value.capacity()); + } else { + ByteBuffer duplicated = value.duplicate(); + duplicated.clear(); + writeRawBytesInternal(duplicated); + } + } + + /** Write an embedded message field, including tag, to the stream. */ + public void writeMessage(final int fieldNumber, final MessageLite value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_LENGTH_DELIMITED); + writeMessageNoTag(value); } /** * Write a MessageSet extension field to the stream. For historical reasons, * the wire format differs from normal fields. */ - public void writeMessageSetExtension(final int fieldNumber, - final MessageLite value) - throws IOException { + public void writeMessageSetExtension(final int fieldNumber, final MessageLite value) + throws IOException { writeTag(WireFormat.MESSAGE_SET_ITEM, WireFormat.WIRETYPE_START_GROUP); writeUInt32(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber); writeMessage(WireFormat.MESSAGE_SET_MESSAGE, value); @@ -353,9 +406,8 @@ public final class CodedOutputStream { * Write an unparsed MessageSet extension field to the stream. For * historical reasons, the wire format differs from normal fields. */ - public void writeRawMessageSetExtension(final int fieldNumber, - final ByteString value) - throws IOException { + public void writeRawMessageSetExtension(final int fieldNumber, final ByteString value) + throws IOException { writeTag(WireFormat.MESSAGE_SET_ITEM, WireFormat.WIRETYPE_START_GROUP); writeUInt32(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber); writeBytes(WireFormat.MESSAGE_SET_MESSAGE, value); @@ -364,19 +416,34 @@ public final class CodedOutputStream { // ----------------------------------------------------------------- - /** Write a {@code double} field to the stream. */ - public void writeDoubleNoTag(final double value) throws IOException { - writeRawLittleEndian64(Double.doubleToRawLongBits(value)); + /** Write an {@code int32} field to the stream. */ + public void writeInt32NoTag(final int value) throws IOException { + if (value >= 0) { + writeRawVarint32(value); + } else { + // Must sign-extend. + writeRawVarint64(value); + } } - /** Write a {@code float} field to the stream. */ - public void writeFloatNoTag(final float value) throws IOException { - writeRawLittleEndian32(Float.floatToRawIntBits(value)); + /** Write a {@code uint32} field to the stream. */ + public void writeUInt32NoTag(final int value) throws IOException { + writeRawVarint32(value); } - /** Write a {@code uint64} field to the stream. */ - public void writeUInt64NoTag(final long value) throws IOException { - writeRawVarint64(value); + /** Write a {@code sint32} field to the stream. */ + public void writeSInt32NoTag(final int value) throws IOException { + writeRawVarint32(encodeZigZag32(value)); + } + + /** Write a {@code fixed32} field to the stream. */ + public void writeFixed32NoTag(final int value) throws IOException { + writeRawLittleEndian32(value); + } + + /** Write a {@code sfixed32} field to the stream. */ + public void writeSFixed32NoTag(final int value) throws IOException { + writeRawLittleEndian32(value); } /** Write an {@code int64} field to the stream. */ @@ -384,14 +451,14 @@ public final class CodedOutputStream { writeRawVarint64(value); } - /** Write an {@code int32} field to the stream. */ - public void writeInt32NoTag(final int value) throws IOException { - if (value >= 0) { - writeRawVarint32(value); - } else { - // Must sign-extend. - writeRawVarint64(value); - } + /** Write a {@code uint64} field to the stream. */ + public void writeUInt64NoTag(final long value) throws IOException { + writeRawVarint64(value); + } + + /** Write a {@code sint64} field to the stream. */ + public void writeSInt64NoTag(final long value) throws IOException { + writeRawVarint64(encodeZigZag64(value)); } /** Write a {@code fixed64} field to the stream. */ @@ -399,9 +466,19 @@ public final class CodedOutputStream { writeRawLittleEndian64(value); } - /** Write a {@code fixed32} field to the stream. */ - public void writeFixed32NoTag(final int value) throws IOException { - writeRawLittleEndian32(value); + /** Write a {@code sfixed64} field to the stream. */ + public void writeSFixed64NoTag(final long value) throws IOException { + writeRawLittleEndian64(value); + } + + /** Write a {@code float} field to the stream. */ + public void writeFloatNoTag(final float value) throws IOException { + writeRawLittleEndian32(Float.floatToRawIntBits(value)); + } + + /** Write a {@code double} field to the stream. */ + public void writeDoubleNoTag(final double value) throws IOException { + writeRawLittleEndian64(Double.doubleToRawLongBits(value)); } /** Write a {@code bool} field to the stream. */ @@ -409,6 +486,14 @@ public final class CodedOutputStream { writeRawByte(value ? 1 : 0); } + /** + * Write an enum field to the stream. The provided value is the numeric + * value used to represent the enum value on the wire (not the enum ordinal value). + */ + public void writeEnumNoTag(final int value) throws IOException { + writeInt32NoTag(value); + } + /** Write a {@code string} field to the stream. */ // TODO(dweis): Document behavior on ill-formed UTF-16 input. public void writeStringNoTag(final String value) throws IOException { @@ -421,89 +506,6 @@ public final class CodedOutputStream { } } - /** Write a {@code string} field to the stream. */ - private void inefficientWriteStringNoTag(final String value) throws IOException { - // Unfortunately there does not appear to be any way to tell Java to encode - // UTF-8 directly into our buffer, so we have to let it create its own byte - // array and then copy. - // TODO(dweis): Consider using nio Charset methods instead. - final byte[] bytes = value.getBytes(Internal.UTF_8); - writeRawVarint32(bytes.length); - writeRawBytes(bytes); - } - - /** - * Write a {@code string} field to the stream efficiently. If the {@code string} is malformed, - * this method rolls back its changes and throws an {@link UnpairedSurrogateException} with the - * intent that the caller will catch and retry with {@link #inefficientWriteStringNoTag(String)}. - * - * @param value the string to write to the stream - * - * @throws UnpairedSurrogateException when {@code value} is ill-formed UTF-16. - */ - private void efficientWriteStringNoTag(final String value) throws IOException { - // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()), - // and at most 3 times of it. We take advantage of this in both branches below. - final int maxLength = value.length() * Utf8.MAX_BYTES_PER_CHAR; - final int maxLengthVarIntSize = computeRawVarint32Size(maxLength); - - // If we are streaming and the potential length is too big to fit in our buffer, we take the - // slower path. Otherwise, we're good to try the fast path. - if (output != null && maxLengthVarIntSize + maxLength > limit - position) { - // Allocate a byte[] that we know can fit the string and encode into it. String.getBytes() - // does the same internally and then does *another copy* to return a byte[] of exactly the - // right size. We can skip that copy and just writeRawBytes up to the actualLength of the - // UTF-8 encoded bytes. - final byte[] encodedBytes = new byte[maxLength]; - int actualLength = Utf8.encode(value, encodedBytes, 0, maxLength); - writeRawVarint32(actualLength); - writeRawBytes(encodedBytes, 0, actualLength); - } else { - // Optimize for the case where we know this length results in a constant varint length as this - // saves a pass for measuring the length of the string. - final int minLengthVarIntSize = computeRawVarint32Size(value.length()); - int oldPosition = position; - final int length; - try { - if (minLengthVarIntSize == maxLengthVarIntSize) { - position = oldPosition + minLengthVarIntSize; - int newPosition = Utf8.encode(value, buffer, position, limit - position); - // Since this class is stateful and tracks the position, we rewind and store the state, - // prepend the length, then reset it back to the end of the string. - position = oldPosition; - length = newPosition - oldPosition - minLengthVarIntSize; - writeRawVarint32(length); - position = newPosition; - } else { - length = Utf8.encodedLength(value); - writeRawVarint32(length); - position = Utf8.encode(value, buffer, position, limit - position); - } - } catch (UnpairedSurrogateException e) { - // Be extra careful and restore the original position for retrying the write with the less - // efficient path. - position = oldPosition; - throw e; - } catch (ArrayIndexOutOfBoundsException e) { - throw new OutOfSpaceException(e); - } - totalBytesWritten += length; - } - } - - /** Write a {@code group} field to the stream. */ - public void writeGroupNoTag(final MessageLite value) throws IOException { - value.writeTo(this); - } - - - /** Write an embedded message field to the stream. */ - public void writeMessageNoTag(final MessageLite value) throws IOException { - writeRawVarint32(value.getSerializedSize()); - value.writeTo(this); - } - - /** Write a {@code bytes} field to the stream. */ public void writeBytesNoTag(final ByteString value) throws IOException { writeRawVarint32(value.size()); @@ -516,86 +518,53 @@ public final class CodedOutputStream { writeRawBytes(value); } - /** Write a {@code bytes} field to the stream. */ - public void writeByteArrayNoTag(final byte[] value, - final int offset, - final int length) throws IOException { - writeRawVarint32(length); - writeRawBytes(value, offset, length); + /** Write an embedded message field to the stream. */ + public void writeMessageNoTag(final MessageLite value) throws IOException { + writeRawVarint32(value.getSerializedSize()); + value.writeTo(this); } + // ================================================================= + // ================================================================= + /** - * Write a {@code bytes} field to the stream. This method will write all - * content of the ByteBuffer regardless of the current position and limit - * (i.e., the number of bytes to be written is value.capacity(), not - * value.remaining()). Furthermore, this method doesn't alter the state of - * the passed-in ByteBuffer. Its position, limit, mark, etc. will remain - * unchanged. If you only want to write the remaining bytes of a ByteBuffer, - * you can call {@code writeByteBufferNoTag(byteBuffer.slice())}. + * Compute the number of bytes that would be needed to encode an + * {@code int32} field, including tag. */ - public void writeByteBufferNoTag(final ByteBuffer value) throws IOException { - writeRawVarint32(value.capacity()); - writeRawBytes(value); - } - - /** Write a {@code uint32} field to the stream. */ - public void writeUInt32NoTag(final int value) throws IOException { - writeRawVarint32(value); + public static int computeInt32Size(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeInt32SizeNoTag(value); } /** - * Write an enum field to the stream. Caller is responsible - * for converting the enum value to its numeric value. + * Compute the number of bytes that would be needed to encode a + * {@code uint32} field, including tag. */ - public void writeEnumNoTag(final int value) throws IOException { - writeInt32NoTag(value); - } - - /** Write an {@code sfixed32} field to the stream. */ - public void writeSFixed32NoTag(final int value) throws IOException { - writeRawLittleEndian32(value); - } - - /** Write an {@code sfixed64} field to the stream. */ - public void writeSFixed64NoTag(final long value) throws IOException { - writeRawLittleEndian64(value); - } - - /** Write an {@code sint32} field to the stream. */ - public void writeSInt32NoTag(final int value) throws IOException { - writeRawVarint32(encodeZigZag32(value)); - } - - /** Write an {@code sint64} field to the stream. */ - public void writeSInt64NoTag(final long value) throws IOException { - writeRawVarint64(encodeZigZag64(value)); + public static int computeUInt32Size(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeUInt32SizeNoTag(value); } - // ================================================================= - /** - * Compute the number of bytes that would be needed to encode a - * {@code double} field, including tag. + * Compute the number of bytes that would be needed to encode an + * {@code sint32} field, including tag. */ - public static int computeDoubleSize(final int fieldNumber, - final double value) { - return computeTagSize(fieldNumber) + computeDoubleSizeNoTag(value); + public static int computeSInt32Size(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeSInt32SizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a - * {@code float} field, including tag. + * {@code fixed32} field, including tag. */ - public static int computeFloatSize(final int fieldNumber, final float value) { - return computeTagSize(fieldNumber) + computeFloatSizeNoTag(value); + public static int computeFixed32Size(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeFixed32SizeNoTag(value); } /** - * Compute the number of bytes that would be needed to encode a - * {@code uint64} field, including tag. + * Compute the number of bytes that would be needed to encode an + * {@code sfixed32} field, including tag. */ - public static int computeUInt64Size(final int fieldNumber, final long value) { - return computeTagSize(fieldNumber) + computeUInt64SizeNoTag(value); + public static int computeSFixed32Size(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeSFixed32SizeNoTag(value); } /** @@ -607,73 +576,83 @@ public final class CodedOutputStream { } /** + * Compute the number of bytes that would be needed to encode a + * {@code uint64} field, including tag. + */ + public static int computeUInt64Size(final int fieldNumber, final long value) { + return computeTagSize(fieldNumber) + computeUInt64SizeNoTag(value); + } + + /** * Compute the number of bytes that would be needed to encode an - * {@code int32} field, including tag. + * {@code sint64} field, including tag. */ - public static int computeInt32Size(final int fieldNumber, final int value) { - return computeTagSize(fieldNumber) + computeInt32SizeNoTag(value); + public static int computeSInt64Size(final int fieldNumber, final long value) { + return computeTagSize(fieldNumber) + computeSInt64SizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a * {@code fixed64} field, including tag. */ - public static int computeFixed64Size(final int fieldNumber, - final long value) { + public static int computeFixed64Size(final int fieldNumber, final long value) { return computeTagSize(fieldNumber) + computeFixed64SizeNoTag(value); } /** - * Compute the number of bytes that would be needed to encode a - * {@code fixed32} field, including tag. + * Compute the number of bytes that would be needed to encode an + * {@code sfixed64} field, including tag. */ - public static int computeFixed32Size(final int fieldNumber, - final int value) { - return computeTagSize(fieldNumber) + computeFixed32SizeNoTag(value); + public static int computeSFixed64Size(final int fieldNumber, final long value) { + return computeTagSize(fieldNumber) + computeSFixed64SizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a - * {@code bool} field, including tag. + * {@code float} field, including tag. */ - public static int computeBoolSize(final int fieldNumber, - final boolean value) { - return computeTagSize(fieldNumber) + computeBoolSizeNoTag(value); + public static int computeFloatSize(final int fieldNumber, final float value) { + return computeTagSize(fieldNumber) + computeFloatSizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a - * {@code string} field, including tag. + * {@code double} field, including tag. */ - public static int computeStringSize(final int fieldNumber, - final String value) { - return computeTagSize(fieldNumber) + computeStringSizeNoTag(value); + public static int computeDoubleSize(final int fieldNumber, final double value) { + return computeTagSize(fieldNumber) + computeDoubleSizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a - * {@code group} field, including tag. + * {@code bool} field, including tag. */ - public static int computeGroupSize(final int fieldNumber, - final MessageLite value) { - return computeTagSize(fieldNumber) * 2 + computeGroupSizeNoTag(value); + public static int computeBoolSize(final int fieldNumber, final boolean value) { + return computeTagSize(fieldNumber) + computeBoolSizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode an - * embedded message field, including tag. + * enum field, including tag. The provided value is the numeric + * value used to represent the enum value on the wire (not the enum ordinal value). */ - public static int computeMessageSize(final int fieldNumber, - final MessageLite value) { - return computeTagSize(fieldNumber) + computeMessageSizeNoTag(value); + public static int computeEnumSize(final int fieldNumber, final int value) { + return computeTagSize(fieldNumber) + computeEnumSizeNoTag(value); + } + + /** + * Compute the number of bytes that would be needed to encode a + * {@code string} field, including tag. + */ + public static int computeStringSize(final int fieldNumber, final String value) { + return computeTagSize(fieldNumber) + computeStringSizeNoTag(value); } /** * Compute the number of bytes that would be needed to encode a * {@code bytes} field, including tag. */ - public static int computeBytesSize(final int fieldNumber, - final ByteString value) { + public static int computeBytesSize(final int fieldNumber, final ByteString value) { return computeTagSize(fieldNumber) + computeBytesSizeNoTag(value); } @@ -681,8 +660,7 @@ public final class CodedOutputStream { * Compute the number of bytes that would be needed to encode a * {@code bytes} field, including tag. */ - public static int computeByteArraySize(final int fieldNumber, - final byte[] value) { + public static int computeByteArraySize(final int fieldNumber, final byte[] value) { return computeTagSize(fieldNumber) + computeByteArraySizeNoTag(value); } @@ -690,8 +668,7 @@ public final class CodedOutputStream { * Compute the number of bytes that would be needed to encode a * {@code bytes} field, including tag. */ - public static int computeByteBufferSize(final int fieldNumber, - final ByteBuffer value) { + public static int computeByteBufferSize(final int fieldNumber, final ByteBuffer value) { return computeTagSize(fieldNumber) + computeByteBufferSizeNoTag(value); } @@ -699,114 +676,111 @@ public final class CodedOutputStream { * Compute the number of bytes that would be needed to encode an * embedded message in lazy field, including tag. */ - public static int computeLazyFieldSize(final int fieldNumber, - final LazyFieldLite value) { + public static int computeLazyFieldSize(final int fieldNumber, final LazyFieldLite value) { return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value); } /** - * Compute the number of bytes that would be needed to encode a - * {@code uint32} field, including tag. + * Compute the number of bytes that would be needed to encode an + * embedded message field, including tag. */ - public static int computeUInt32Size(final int fieldNumber, final int value) { - return computeTagSize(fieldNumber) + computeUInt32SizeNoTag(value); + public static int computeMessageSize(final int fieldNumber, final MessageLite value) { + return computeTagSize(fieldNumber) + computeMessageSizeNoTag(value); } /** - * Compute the number of bytes that would be needed to encode an - * enum field, including tag. Caller is responsible for converting the - * enum value to its numeric value. + * Compute the number of bytes that would be needed to encode a + * MessageSet extension to the stream. For historical reasons, + * the wire format differs from normal fields. */ - public static int computeEnumSize(final int fieldNumber, final int value) { - return computeTagSize(fieldNumber) + computeEnumSizeNoTag(value); + public static int computeMessageSetExtensionSize(final int fieldNumber, final MessageLite value) { + return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + + computeMessageSize(WireFormat.MESSAGE_SET_MESSAGE, value); } /** * Compute the number of bytes that would be needed to encode an - * {@code sfixed32} field, including tag. + * unparsed MessageSet extension field to the stream. For + * historical reasons, the wire format differs from normal fields. */ - public static int computeSFixed32Size(final int fieldNumber, - final int value) { - return computeTagSize(fieldNumber) + computeSFixed32SizeNoTag(value); + public static int computeRawMessageSetExtensionSize( + final int fieldNumber, final ByteString value) { + return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + + computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value); } /** * Compute the number of bytes that would be needed to encode an - * {@code sfixed64} field, including tag. + * lazily parsed MessageSet extension field to the stream. For + * historical reasons, the wire format differs from normal fields. */ - public static int computeSFixed64Size(final int fieldNumber, - final long value) { - return computeTagSize(fieldNumber) + computeSFixed64SizeNoTag(value); + public static int computeLazyFieldMessageSetExtensionSize( + final int fieldNumber, final LazyFieldLite value) { + return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + + computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value); } - /** - * Compute the number of bytes that would be needed to encode an - * {@code sint32} field, including tag. - */ - public static int computeSInt32Size(final int fieldNumber, final int value) { - return computeTagSize(fieldNumber) + computeSInt32SizeNoTag(value); + // ----------------------------------------------------------------- + + /** Compute the number of bytes that would be needed to encode a tag. */ + public static int computeTagSize(final int fieldNumber) { + return computeRawVarint32Size(WireFormat.makeTag(fieldNumber, 0)); } /** * Compute the number of bytes that would be needed to encode an - * {@code sint64} field, including tag. + * {@code int32} field, including tag. */ - public static int computeSInt64Size(final int fieldNumber, final long value) { - return computeTagSize(fieldNumber) + computeSInt64SizeNoTag(value); + public static int computeInt32SizeNoTag(final int value) { + if (value >= 0) { + return computeRawVarint32Size(value); + } else { + // Must sign-extend. + return 10; + } } /** * Compute the number of bytes that would be needed to encode a - * MessageSet extension to the stream. For historical reasons, - * the wire format differs from normal fields. + * {@code uint32} field. */ - public static int computeMessageSetExtensionSize( - final int fieldNumber, final MessageLite value) { - return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + - computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + - computeMessageSize(WireFormat.MESSAGE_SET_MESSAGE, value); + public static int computeUInt32SizeNoTag(final int value) { + return computeRawVarint32Size(value); } /** * Compute the number of bytes that would be needed to encode an - * unparsed MessageSet extension field to the stream. For - * historical reasons, the wire format differs from normal fields. + * {@code sint32} field. */ - public static int computeRawMessageSetExtensionSize( - final int fieldNumber, final ByteString value) { - return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + - computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + - computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value); + public static int computeSInt32SizeNoTag(final int value) { + return computeRawVarint32Size(encodeZigZag32(value)); } /** - * Compute the number of bytes that would be needed to encode an - * lazily parsed MessageSet extension field to the stream. For - * historical reasons, the wire format differs from normal fields. + * Compute the number of bytes that would be needed to encode a + * {@code fixed32} field. */ - public static int computeLazyFieldMessageSetExtensionSize( - final int fieldNumber, final LazyFieldLite value) { - return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + - computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + - computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value); + public static int computeFixed32SizeNoTag(@SuppressWarnings("unused") final int unused) { + return LITTLE_ENDIAN_32_SIZE; } - // ----------------------------------------------------------------- - /** - * Compute the number of bytes that would be needed to encode a - * {@code double} field, including tag. + * Compute the number of bytes that would be needed to encode an + * {@code sfixed32} field. */ - public static int computeDoubleSizeNoTag(final double value) { - return LITTLE_ENDIAN_64_SIZE; + public static int computeSFixed32SizeNoTag(@SuppressWarnings("unused") final int unused) { + return LITTLE_ENDIAN_32_SIZE; } /** - * Compute the number of bytes that would be needed to encode a - * {@code float} field, including tag. + * Compute the number of bytes that would be needed to encode an + * {@code int64} field, including tag. */ - public static int computeFloatSizeNoTag(final float value) { - return LITTLE_ENDIAN_32_SIZE; + public static int computeInt64SizeNoTag(final long value) { + return computeRawVarint64Size(value); } /** @@ -819,50 +793,62 @@ public final class CodedOutputStream { /** * Compute the number of bytes that would be needed to encode an - * {@code int64} field, including tag. + * {@code sint64} field. */ - public static int computeInt64SizeNoTag(final long value) { - return computeRawVarint64Size(value); + public static int computeSInt64SizeNoTag(final long value) { + return computeRawVarint64Size(encodeZigZag64(value)); } /** - * Compute the number of bytes that would be needed to encode an - * {@code int32} field, including tag. + * Compute the number of bytes that would be needed to encode a + * {@code fixed64} field. */ - public static int computeInt32SizeNoTag(final int value) { - if (value >= 0) { - return computeRawVarint32Size(value); - } else { - // Must sign-extend. - return 10; - } + public static int computeFixed64SizeNoTag(@SuppressWarnings("unused") final long unused) { + return LITTLE_ENDIAN_64_SIZE; } /** - * Compute the number of bytes that would be needed to encode a - * {@code fixed64} field. + * Compute the number of bytes that would be needed to encode an + * {@code sfixed64} field. */ - public static int computeFixed64SizeNoTag(final long value) { + public static int computeSFixed64SizeNoTag(@SuppressWarnings("unused") final long unused) { return LITTLE_ENDIAN_64_SIZE; } /** * Compute the number of bytes that would be needed to encode a - * {@code fixed32} field. + * {@code float} field, including tag. */ - public static int computeFixed32SizeNoTag(final int value) { + public static int computeFloatSizeNoTag(@SuppressWarnings("unused") final float unused) { return LITTLE_ENDIAN_32_SIZE; } /** * Compute the number of bytes that would be needed to encode a + * {@code double} field, including tag. + */ + public static int computeDoubleSizeNoTag(@SuppressWarnings("unused") final double unused) { + return LITTLE_ENDIAN_64_SIZE; + } + + /** + * Compute the number of bytes that would be needed to encode a * {@code bool} field. */ - public static int computeBoolSizeNoTag(final boolean value) { + public static int computeBoolSizeNoTag(@SuppressWarnings("unused") final boolean unused) { return 1; } /** + * Compute the number of bytes that would be needed to encode an enum field. + * The provided value is the numeric value used to represent the enum value on the wire + * (not the enum ordinal value). + */ + public static int computeEnumSizeNoTag(final int value) { + return computeInt32SizeNoTag(value); + } + + /** * Compute the number of bytes that would be needed to encode a * {@code string} field. */ @@ -880,23 +866,6 @@ public final class CodedOutputStream { } /** - * Compute the number of bytes that would be needed to encode a - * {@code group} field. - */ - public static int computeGroupSizeNoTag(final MessageLite value) { - return value.getSerializedSize(); - } - - /** - * Compute the number of bytes that would be needed to encode an embedded - * message field. - */ - public static int computeMessageSizeNoTag(final MessageLite value) { - final int size = value.getSerializedSize(); - return computeRawVarint32Size(size) + size; - } - - /** * Compute the number of bytes that would be needed to encode an embedded * message stored in lazy field. */ @@ -910,8 +879,7 @@ public final class CodedOutputStream { * {@code bytes} field. */ public static int computeBytesSizeNoTag(final ByteString value) { - return computeRawVarint32Size(value.size()) + - value.size(); + return computeRawVarint32Size(value.size()) + value.size(); } /** @@ -931,72 +899,47 @@ public final class CodedOutputStream { } /** - * Compute the number of bytes that would be needed to encode a - * {@code uint32} field. - */ - public static int computeUInt32SizeNoTag(final int value) { - return computeRawVarint32Size(value); - } - - /** - * Compute the number of bytes that would be needed to encode an enum field. - * Caller is responsible for converting the enum value to its numeric value. - */ - public static int computeEnumSizeNoTag(final int value) { - return computeInt32SizeNoTag(value); - } - - /** - * Compute the number of bytes that would be needed to encode an - * {@code sfixed32} field. - */ - public static int computeSFixed32SizeNoTag(final int value) { - return LITTLE_ENDIAN_32_SIZE; - } - - /** - * Compute the number of bytes that would be needed to encode an - * {@code sfixed64} field. + * Compute the number of bytes that would be needed to encode an embedded + * message field. */ - public static int computeSFixed64SizeNoTag(final long value) { - return LITTLE_ENDIAN_64_SIZE; + public static int computeMessageSizeNoTag(final MessageLite value) { + final int size = value.getSerializedSize(); + return computeRawVarint32Size(size) + size; } /** - * Compute the number of bytes that would be needed to encode an - * {@code sint32} field. + * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers + * into values that can be efficiently encoded with varint. (Otherwise, + * negative values must be sign-extended to 64 bits to be varint encoded, + * thus always taking 10 bytes on the wire.) + * + * @param n A signed 32-bit integer. + * @return An unsigned 32-bit integer, stored in a signed int because + * Java has no explicit unsigned support. */ - public static int computeSInt32SizeNoTag(final int value) { - return computeRawVarint32Size(encodeZigZag32(value)); + public static int encodeZigZag32(final int n) { + // Note: the right-shift must be arithmetic + return (n << 1) ^ (n >> 31); } /** - * Compute the number of bytes that would be needed to encode an - * {@code sint64} field. + * Encode a ZigZag-encoded 64-bit value. ZigZag encodes signed integers + * into values that can be efficiently encoded with varint. (Otherwise, + * negative values must be sign-extended to 64 bits to be varint encoded, + * thus always taking 10 bytes on the wire.) + * + * @param n A signed 64-bit integer. + * @return An unsigned 64-bit integer, stored in a signed int because + * Java has no explicit unsigned support. */ - public static int computeSInt64SizeNoTag(final long value) { - return computeRawVarint64Size(encodeZigZag64(value)); + public static long encodeZigZag64(final long n) { + // Note: the right-shift must be arithmetic + return (n << 1) ^ (n >> 63); } // ================================================================= /** - * Internal helper that writes the current buffer to the output. The - * buffer position is reset to its initial value when this returns. - */ - private void refreshBuffer() throws IOException { - if (output == null) { - // We're writing to a single buffer. - throw new OutOfSpaceException(); - } - - // Since we have an output stream, this is our buffer - // and buffer offset == 0 - output.write(buffer, 0, position); - position = 0; - } - - /** * Flushes the stream and forces any buffered bytes to be written. This * does not flush the underlying OutputStream. */ @@ -1015,8 +958,8 @@ public final class CodedOutputStream { return limit - position; } else { throw new UnsupportedOperationException( - "spaceLeft() can only be called on CodedOutputStreams that are " + - "writing to a flat array."); + "spaceLeft() can only be called on CodedOutputStreams that are " + + "writing to a flat array."); } } @@ -1029,8 +972,7 @@ public final class CodedOutputStream { */ public void checkNoSpaceLeft() { if (spaceLeft() != 0) { - throw new IllegalStateException( - "Did not write as much data as expected."); + throw new IllegalStateException("Did not write as much data as expected."); } } @@ -1063,53 +1005,96 @@ public final class CodedOutputStream { return totalBytesWritten; } - /** Write a single byte. */ - public void writeRawByte(final byte value) throws IOException { - if (position == limit) { - refreshBuffer(); - } - - buffer[position++] = value; - ++totalBytesWritten; - } + // ================================================================= - /** Write a single byte, represented by an integer value. */ - public void writeRawByte(final int value) throws IOException { - writeRawByte((byte) value); - } + /** + * Internal helper that writes the current buffer to the output. The + * buffer position is reset to its initial value when this returns. + */ + private void refreshBuffer() throws IOException { + if (output == null) { + // We're writing to a single buffer. + throw new OutOfSpaceException(); + } - /** Write a byte string. */ - public void writeRawBytes(final ByteString value) throws IOException { - writeRawBytes(value, 0, value.size()); + // Since we have an output stream, this is our buffer + // and buffer offset == 0 + output.write(buffer, 0, position); + position = 0; } - /** Write an array of bytes. */ - public void writeRawBytes(final byte[] value) throws IOException { - writeRawBytes(value, 0, value.length); + /** Write a {@code string} field to the stream. */ + private void inefficientWriteStringNoTag(final String value) throws IOException { + // Unfortunately there does not appear to be any way to tell Java to encode + // UTF-8 directly into our buffer, so we have to let it create its own byte + // array and then copy. + // TODO(dweis): Consider using nio Charset methods instead. + final byte[] bytes = value.getBytes(Internal.UTF_8); + writeRawVarint32(bytes.length); + writeRawBytes(bytes); } /** - * Write a ByteBuffer. This method will write all content of the ByteBuffer - * regardless of the current position and limit (i.e., the number of bytes - * to be written is value.capacity(), not value.remaining()). Furthermore, - * this method doesn't alter the state of the passed-in ByteBuffer. Its - * position, limit, mark, etc. will remain unchanged. If you only want to - * write the remaining bytes of a ByteBuffer, you can call - * {@code writeRawBytes(byteBuffer.slice())}. + * Write a {@code string} field to the stream efficiently. If the {@code string} is malformed, + * this method rolls back its changes and throws an {@link UnpairedSurrogateException} with the + * intent that the caller will catch and retry with {@link #inefficientWriteStringNoTag(String)}. + * + * @param value the string to write to the stream + * + * @throws UnpairedSurrogateException when {@code value} is ill-formed UTF-16. */ - public void writeRawBytes(final ByteBuffer value) throws IOException { - if (value.hasArray()) { - writeRawBytes(value.array(), value.arrayOffset(), value.capacity()); + private void efficientWriteStringNoTag(final String value) throws IOException { + // UTF-8 byte length of the string is at least its UTF-16 code unit length (value.length()), + // and at most 3 times of it. We take advantage of this in both branches below. + final int maxLength = value.length() * Utf8.MAX_BYTES_PER_CHAR; + final int maxLengthVarIntSize = computeRawVarint32Size(maxLength); + + // If we are streaming and the potential length is too big to fit in our buffer, we take the + // slower path. Otherwise, we're good to try the fast path. + if (output != null && maxLengthVarIntSize + maxLength > limit - position) { + // Allocate a byte[] that we know can fit the string and encode into it. String.getBytes() + // does the same internally and then does *another copy* to return a byte[] of exactly the + // right size. We can skip that copy and just writeRawBytes up to the actualLength of the + // UTF-8 encoded bytes. + final byte[] encodedBytes = new byte[maxLength]; + int actualLength = Utf8.encode(value, encodedBytes, 0, maxLength); + writeRawVarint32(actualLength); + writeRawBytes(encodedBytes, 0, actualLength); } else { - ByteBuffer duplicated = value.duplicate(); - duplicated.clear(); - writeRawBytesInternal(duplicated); + // Optimize for the case where we know this length results in a constant varint length as this + // saves a pass for measuring the length of the string. + final int minLengthVarIntSize = computeRawVarint32Size(value.length()); + int oldPosition = position; + final int length; + try { + if (minLengthVarIntSize == maxLengthVarIntSize) { + position = oldPosition + minLengthVarIntSize; + int newPosition = Utf8.encode(value, buffer, position, limit - position); + // Since this class is stateful and tracks the position, we rewind and store the state, + // prepend the length, then reset it back to the end of the string. + position = oldPosition; + length = newPosition - oldPosition - minLengthVarIntSize; + writeRawVarint32(length); + position = newPosition; + } else { + length = Utf8.encodedLength(value); + writeRawVarint32(length); + position = Utf8.encode(value, buffer, position, limit - position); + } + } catch (UnpairedSurrogateException e) { + // Be extra careful and restore the original position for retrying the write with the less + // efficient path. + position = oldPosition; + throw e; + } catch (ArrayIndexOutOfBoundsException e) { + throw new OutOfSpaceException(e); + } + totalBytesWritten += length; } } /** Write a ByteBuffer that isn't backed by an array. */ - private void writeRawBytesInternal(final ByteBuffer value) - throws IOException { + private void writeRawBytesInternal(final ByteBuffer value) throws IOException { int length = value.remaining(); if (limit - position >= length) { // We have room in the current buffer. @@ -1143,43 +1128,29 @@ public final class CodedOutputStream { } } - /** Write part of an array of bytes. */ - public void writeRawBytes(final byte[] value, int offset, int length) - throws IOException { - if (limit - position >= length) { - // We have room in the current buffer. - System.arraycopy(value, offset, buffer, position, length); - position += length; - totalBytesWritten += length; - } else { - // Write extends past current buffer. Fill the rest of this buffer and - // flush. - final int bytesWritten = limit - position; - System.arraycopy(value, offset, buffer, position, bytesWritten); - offset += bytesWritten; - length -= bytesWritten; - position = limit; - totalBytesWritten += bytesWritten; - refreshBuffer(); + /** Write a {@code bytes} field to the stream. Visible for testing. */ + void writeByteArrayNoTag(final byte[] value, final int offset, final int length) + throws IOException { + writeRawVarint32(length); + writeRawBytes(value, offset, length); + } - // Now deal with the rest. - // Since we have an output stream, this is our buffer - // and buffer offset == 0 - if (length <= limit) { - // Fits in new buffer. - System.arraycopy(value, offset, buffer, 0, length); - position = length; - } else { - // Write is very big. Let's do it all at once. - output.write(value, offset, length); - } - totalBytesWritten += length; - } + /** + * Write a {@code bytes} field to the stream. This method will write all + * content of the ByteBuffer regardless of the current position and limit + * (i.e., the number of bytes to be written is value.capacity(), not + * value.remaining()). Furthermore, this method doesn't alter the state of + * the passed-in ByteBuffer. Its position, limit, mark, etc. will remain + * unchanged. If you only want to write the remaining bytes of a ByteBuffer, + * you can call {@code writeByteBufferNoTag(byteBuffer.slice())}. + */ + private void writeByteBufferNoTag(final ByteBuffer value) throws IOException { + writeRawVarint32(value.capacity()); + writeRawBytes(value); } /** Write part of a byte string. */ - public void writeRawBytes(final ByteString value, int offset, int length) - throws IOException { + private void writeRawBytes(final ByteString value, int offset, int length) throws IOException { if (limit - position >= length) { // We have room in the current buffer. value.copyTo(buffer, offset, position, length); @@ -1210,21 +1181,57 @@ public final class CodedOutputStream { } } - /** Encode and write a tag. */ - public void writeTag(final int fieldNumber, final int wireType) - throws IOException { - writeRawVarint32(WireFormat.makeTag(fieldNumber, wireType)); + // ================================================================= + + /** + * Write a {@code group} field, including tag, to the stream. + * + * @deprecated groups are deprecated. + */ + @Deprecated + public void writeGroup(final int fieldNumber, final MessageLite value) throws IOException { + writeTag(fieldNumber, WireFormat.WIRETYPE_START_GROUP); + writeGroupNoTag(value); + writeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP); } - /** Compute the number of bytes that would be needed to encode a tag. */ - public static int computeTagSize(final int fieldNumber) { - return computeRawVarint32Size(WireFormat.makeTag(fieldNumber, 0)); + /** + * Write a {@code group} field to the stream. + * + * @deprecated groups are deprecated. + */ + @Deprecated + public void writeGroupNoTag(final MessageLite value) throws IOException { + value.writeTo(this); + } + + /** + * Compute the number of bytes that would be needed to encode a + * {@code group} field, including tag. + * + * @deprecated groups are deprecated. + */ + @Deprecated + public static int computeGroupSize(final int fieldNumber, final MessageLite value) { + return computeTagSize(fieldNumber) * 2 + computeGroupSizeNoTag(value); + } + + /** + * Compute the number of bytes that would be needed to encode a + * {@code group} field. + */ + @Deprecated + public static int computeGroupSizeNoTag(final MessageLite value) { + return value.getSerializedSize(); } /** * Encode and write a varint. {@code value} is treated as * unsigned, so it won't be sign-extended if negative. + * + * @deprecated use {@link #writeUInt32NoTag} instead. */ + @Deprecated public void writeRawVarint32(int value) throws IOException { while (true) { if ((value & ~0x7F) == 0) { @@ -1238,95 +1245,104 @@ public final class CodedOutputStream { } /** - * Compute the number of bytes that would be needed to encode a varint. - * {@code value} is treated as unsigned, so it won't be sign-extended if - * negative. + * Encode and write a varint. + * + * @deprecated use {@link #writeUInt64NoTag} instead. */ - public static int computeRawVarint32Size(final int value) { - if ((value & (~0 << 7)) == 0) return 1; - if ((value & (~0 << 14)) == 0) return 2; - if ((value & (~0 << 21)) == 0) return 3; - if ((value & (~0 << 28)) == 0) return 4; - return 5; - } - - /** Encode and write a varint. */ + @Deprecated public void writeRawVarint64(long value) throws IOException { while (true) { if ((value & ~0x7FL) == 0) { - writeRawByte((int)value); + writeRawByte((int) value); return; } else { - writeRawByte(((int)value & 0x7F) | 0x80); + writeRawByte(((int) value & 0x7F) | 0x80); value >>>= 7; } } } - /** Compute the number of bytes that would be needed to encode a varint. */ + /** + * Compute the number of bytes that would be needed to encode a varint. + * {@code value} is treated as unsigned, so it won't be sign-extended if + * negative. + * + * @deprecated use {@link #computeUInt32SizeNoTag(int)} instead. + */ + @Deprecated + public static int computeRawVarint32Size(final int value) { + if ((value & (~0 << 7)) == 0) { + return 1; + } + if ((value & (~0 << 14)) == 0) { + return 2; + } + if ((value & (~0 << 21)) == 0) { + return 3; + } + if ((value & (~0 << 28)) == 0) { + return 4; + } + return 5; + } + + /** + * Compute the number of bytes that would be needed to encode a varint. + * + * @deprecated use {@link #computeUInt64SizeNoTag(long)} instead. + */ + @Deprecated public static int computeRawVarint64Size(long value) { // handle two popular special cases up front ... - if ((value & (~0L << 7)) == 0L) return 1; - if (value < 0L) return 10; + if ((value & (~0L << 7)) == 0L) { + return 1; + } + if (value < 0L) { + return 10; + } // ... leaving us with 8 remaining, which we can divide and conquer int n = 2; - if ((value & (~0L << 35)) != 0L) { n += 4; value >>>= 28; } - if ((value & (~0L << 21)) != 0L) { n += 2; value >>>= 14; } - if ((value & (~0L << 14)) != 0L) { n += 1; } + if ((value & (~0L << 35)) != 0L) { + n += 4; + value >>>= 28; + } + if ((value & (~0L << 21)) != 0L) { + n += 2; + value >>>= 14; + } + if ((value & (~0L << 14)) != 0L) { + n += 1; + } return n; } - /** Write a little-endian 32-bit integer. */ - public void writeRawLittleEndian32(final int value) throws IOException { - writeRawByte((value ) & 0xFF); - writeRawByte((value >> 8) & 0xFF); - writeRawByte((value >> 16) & 0xFF); - writeRawByte((value >> 24) & 0xFF); - } - - public static final int LITTLE_ENDIAN_32_SIZE = 4; - - /** Write a little-endian 64-bit integer. */ - public void writeRawLittleEndian64(final long value) throws IOException { - writeRawByte((int)(value ) & 0xFF); - writeRawByte((int)(value >> 8) & 0xFF); - writeRawByte((int)(value >> 16) & 0xFF); - writeRawByte((int)(value >> 24) & 0xFF); - writeRawByte((int)(value >> 32) & 0xFF); - writeRawByte((int)(value >> 40) & 0xFF); - writeRawByte((int)(value >> 48) & 0xFF); - writeRawByte((int)(value >> 56) & 0xFF); - } - - public static final int LITTLE_ENDIAN_64_SIZE = 8; - /** - * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers - * into values that can be efficiently encoded with varint. (Otherwise, - * negative values must be sign-extended to 64 bits to be varint encoded, - * thus always taking 10 bytes on the wire.) + * Write a little-endian 32-bit integer. * - * @param n A signed 32-bit integer. - * @return An unsigned 32-bit integer, stored in a signed int because - * Java has no explicit unsigned support. + * @deprecated Use {@link #writeFixed32NoTag} instead. */ - public static int encodeZigZag32(final int n) { - // Note: the right-shift must be arithmetic - return (n << 1) ^ (n >> 31); + @Deprecated + public void writeRawLittleEndian32(final int value) throws IOException { + writeRawByte((value) & 0xFF); + writeRawByte((value >> 8) & 0xFF); + writeRawByte((value >> 16) & 0xFF); + writeRawByte((value >> 24) & 0xFF); } /** - * Encode a ZigZag-encoded 64-bit value. ZigZag encodes signed integers - * into values that can be efficiently encoded with varint. (Otherwise, - * negative values must be sign-extended to 64 bits to be varint encoded, - * thus always taking 10 bytes on the wire.) + * Write a little-endian 64-bit integer. * - * @param n A signed 64-bit integer. - * @return An unsigned 64-bit integer, stored in a signed int because - * Java has no explicit unsigned support. + * @deprecated Use {@link #writeFixed64NoTag} instead. */ - public static long encodeZigZag64(final long n) { - // Note: the right-shift must be arithmetic - return (n << 1) ^ (n >> 63); + @Deprecated + public void writeRawLittleEndian64(final long value) throws IOException { + writeRawByte((int) (value) & 0xFF); + writeRawByte((int) (value >> 8) & 0xFF); + writeRawByte((int) (value >> 16) & 0xFF); + writeRawByte((int) (value >> 24) & 0xFF); + writeRawByte((int) (value >> 32) & 0xFF); + writeRawByte((int) (value >> 40) & 0xFF); + writeRawByte((int) (value >> 48) & 0xFF); + writeRawByte((int) (value >> 56) & 0xFF); } } diff --git a/java/core/src/main/java/com/google/protobuf/Descriptors.java b/java/core/src/main/java/com/google/protobuf/Descriptors.java index 5e15cfbe..e303e138 100644 --- a/java/core/src/main/java/com/google/protobuf/Descriptors.java +++ b/java/core/src/main/java/com/google/protobuf/Descriptors.java @@ -272,7 +272,7 @@ public final class Descriptors { * because a field has an undefined type or because two messages * were defined with the same name. */ - private static FileDescriptor buildFrom( + public static FileDescriptor buildFrom( final FileDescriptorProto proto, final FileDescriptor[] dependencies, final boolean allowUnknownDependencies) throws DescriptorValidationException { @@ -1123,7 +1123,7 @@ public final class Descriptors { private JavaType javaType; public FieldDescriptorProto.Type toProto() { - return FieldDescriptorProto.Type.valueOf(ordinal() + 1); + return FieldDescriptorProto.Type.forNumber(ordinal() + 1); } public JavaType getJavaType() { return javaType; } diff --git a/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java b/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java index 6f41fb81..3cd4c884 100644 --- a/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java +++ b/java/core/src/main/java/com/google/protobuf/ExperimentalApi.java @@ -1,3 +1,33 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + package com.google.protobuf; import java.lang.annotation.Documented; diff --git a/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java index ceb97a4e..a50afe55 100644 --- a/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java +++ b/java/core/src/main/java/com/google/protobuf/GeneratedMessage.java @@ -1019,7 +1019,9 @@ public abstract class GeneratedMessage extends AbstractMessage verifyContainingType(field); final Object value = extensions.getField(field); if (value == null) { - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (field.isRepeated()) { + return Collections.emptyList(); + } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { // Lacking an ExtensionRegistry, we have no way to determine the // extension's real type, so we return a DynamicMessage. return DynamicMessage.getDefaultInstance(field.getMessageType()); diff --git a/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java b/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java index 81e1862c..12a1472d 100644 --- a/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java +++ b/java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java @@ -30,6 +30,7 @@ package com.google.protobuf; +import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream; import com.google.protobuf.Internal.BooleanList; import com.google.protobuf.Internal.DoubleList; import com.google.protobuf.Internal.FloatList; @@ -39,6 +40,7 @@ import com.google.protobuf.Internal.ProtobufList; import com.google.protobuf.WireFormat.FieldType; import java.io.IOException; +import java.io.InputStream; import java.io.ObjectStreamException; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; @@ -57,10 +59,7 @@ import java.util.Map; public abstract class GeneratedMessageLite< MessageType extends GeneratedMessageLite<MessageType, BuilderType>, BuilderType extends GeneratedMessageLite.Builder<MessageType, BuilderType>> - extends AbstractMessageLite - implements Serializable { - - private static final long serialVersionUID = 1L; + extends AbstractMessageLite { /** For use by generated code only. Lazily initialized to reduce allocations. */ protected UnknownFieldSetLite unknownFields = null; @@ -83,6 +82,24 @@ public abstract class GeneratedMessageLite< return (BuilderType) dynamicMethod(MethodToInvoke.NEW_BUILDER); } + /** + * A reflective toString function. This is primarily intended as a developer aid, while keeping + * binary size down. The first line of the {@code toString()} representation includes a commented + * version of {@code super.toString()} to act as an indicator that this should not be relied on + * for comparisons. + * <p> + * NOTE: This method relies on the field getter methods not being stripped or renamed by proguard. + * If they are, the fields will not be included in the returned string representation. + * <p> + * NOTE: This implementation is liable to change in the future, and should not be relied on in + * code. + */ + @Override + public String toString() { + return MessageLiteToString.toString(this, super.toString()); + } + + // The general strategy for unknown fields is to use an UnknownFieldSetLite that is treated as // mutable during the parsing constructor and immutable after. This allows us to avoid // any unnecessary intermediary allocations while reducing the generated code size. @@ -303,10 +320,9 @@ public abstract class GeneratedMessageLite< throws java.io.IOException { MessageType parsedMessage = null; try { - parsedMessage = - (MessageType) getDefaultInstanceForType().getParserForType().parsePartialFrom( - input, extensionRegistry); - } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = parsePartialFrom( + (MessageType) getDefaultInstanceForType(), input, extensionRegistry); + } catch (InvalidProtocolBufferException e) { parsedMessage = (MessageType) e.getUnfinishedMessage(); throw e; } finally { @@ -562,7 +578,6 @@ public abstract class GeneratedMessageLite< return extensions.isInitialized(); } - @Override protected final void doneParsing() { super.doneParsing(); @@ -1049,7 +1064,12 @@ public abstract class GeneratedMessageLite< * A serialized (serializable) form of the generated message. Stores the * message as a class name and a byte array. */ - static final class SerializedForm implements Serializable { + protected static final class SerializedForm implements Serializable { + + public static SerializedForm of(MessageLite message) { + return new SerializedForm(message); + } + private static final long serialVersionUID = 0L; private final String messageClassName; @@ -1093,16 +1113,6 @@ public abstract class GeneratedMessageLite< } } } - - /** - * Replaces this object in the output stream with a serialized form. - * Part of Java's serialization magic. Generated sub-classes must override - * this method by calling {@code return super.writeReplace();} - * @return a SerializedForm of this message - */ - protected Object writeReplace() throws ObjectStreamException { - return new SerializedForm(this); - } /** * Checks that the {@link Extension} is Lite and returns it as a @@ -1135,45 +1145,6 @@ public abstract class GeneratedMessageLite< message.dynamicMethod(MethodToInvoke.MAKE_IMMUTABLE); } - /** - * A static helper method for parsing a partial from input using the extension registry and the - * instance. - */ - static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom( - T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry) - throws InvalidProtocolBufferException { - try { - return (T) instance.dynamicMethod( - MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry); - } catch (RuntimeException e) { - if (e.getCause() instanceof InvalidProtocolBufferException) { - throw (InvalidProtocolBufferException) e.getCause(); - } - throw e; - } - } - - /** - * A {@link Parser} implementation that delegates to the default instance. - * <p> - * For use by generated code only. - */ - protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>> - extends AbstractParser<T> { - - private T defaultInstance; - - public DefaultInstanceBasedParser(T defaultInstance) { - this.defaultInstance = defaultInstance; - } - - @Override - public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry) - throws InvalidProtocolBufferException { - return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry); - } - } - protected static IntList newIntList() { return new IntArrayList(); } @@ -1269,8 +1240,218 @@ public abstract class GeneratedMessageLite< protected static <E> ProtobufList<E> emptyProtobufList() { return ProtobufArrayList.emptyList(); } - + protected static LazyStringArrayList emptyLazyStringArrayList() { return LazyStringArrayList.emptyList(); } + + /** + * A {@link Parser} implementation that delegates to the default instance. + * <p> + * For use by generated code only. + */ + protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>> + extends AbstractParser<T> { + + private T defaultInstance; + + public DefaultInstanceBasedParser(T defaultInstance) { + this.defaultInstance = defaultInstance; + } + + @Override + public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry); + } + } + + /** + * A static helper method for parsing a partial from input using the extension registry and the + * instance. + */ + // TODO(dweis): Should this verify that the last tag was 0? + static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom( + T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + T result; + try { + result = (T) instance.dynamicMethod( + MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry); + } catch (RuntimeException e) { + if (e.getCause() instanceof InvalidProtocolBufferException) { + throw (InvalidProtocolBufferException) e.getCause(); + } + throw e; + } + return result; + } + + protected static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom( + T defaultInstance, + CodedInputStream input) + throws InvalidProtocolBufferException { + return parsePartialFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry()); + } + + /** + * Helper method to check if message is initialized. + * + * @throws InvalidProtocolBufferException if it is not initialized. + * @return The message to check. + */ + private static <T extends GeneratedMessageLite<T, ?>> T checkMessageInitialized(T message) + throws InvalidProtocolBufferException { + if (message != null && !message.isInitialized()) { + throw message.newUninitializedMessageException() + .asInvalidProtocolBufferException() + .setUnfinishedMessage(message); + } + return message; + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, ByteString data) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parseFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry())); + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry)); + } + + // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the + // ByteString. + private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom( + T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + T message; + try { + CodedInputStream input = data.newCodedInput(); + message = parsePartialFrom(defaultInstance, input, extensionRegistry); + try { + input.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } catch (InvalidProtocolBufferException e) { + throw e; + } + } + + // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the + // ByteString. + private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom( + T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + T message; + try { + CodedInputStream input = CodedInputStream.newInstance(data); + message = parsePartialFrom(defaultInstance, input, extensionRegistry); + try { + input.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } catch (InvalidProtocolBufferException e) { + throw e; + } + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, byte[] data) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry())); + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry)); + } + + // Does not validate last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, InputStream input) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input), + ExtensionRegistryLite.getEmptyRegistry())); + } + + // Does not validate last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input), extensionRegistry)); + } + + // Does not validate last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, CodedInputStream input) + throws InvalidProtocolBufferException { + return parseFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry()); + } + + // Does not validate last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom( + T defaultInstance, CodedInputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(defaultInstance, input, extensionRegistry)); + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom( + T defaultInstance, InputStream input) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialDelimitedFrom(defaultInstance, input, + ExtensionRegistryLite.getEmptyRegistry())); + } + + // Validates last tag. + protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom( + T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialDelimitedFrom(defaultInstance, input, extensionRegistry)); + } + + private static <T extends GeneratedMessageLite<T, ?>> T parsePartialDelimitedFrom( + T defaultInstance, + InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + int size; + try { + int firstByte = input.read(); + if (firstByte == -1) { + return null; + } + size = CodedInputStream.readRawVarint32(firstByte, input); + } catch (IOException e) { + throw new InvalidProtocolBufferException(e.getMessage()); + } + InputStream limitedInput = new LimitedInputStream(input, size); + CodedInputStream codedInput = CodedInputStream.newInstance(limitedInput); + T message = parsePartialFrom(defaultInstance, codedInput, extensionRegistry); + try { + codedInput.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } } diff --git a/java/core/src/main/java/com/google/protobuf/Internal.java b/java/core/src/main/java/com/google/protobuf/Internal.java index e19b6dca..abf7ddd6 100644 --- a/java/core/src/main/java/com/google/protobuf/Internal.java +++ b/java/core/src/main/java/com/google/protobuf/Internal.java @@ -51,10 +51,12 @@ import java.util.Set; * * @author kenton@google.com (Kenton Varda) */ -public class Internal { +public final class Internal { - protected static final Charset UTF_8 = Charset.forName("UTF-8"); - protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); + private Internal() {} + + static final Charset UTF_8 = Charset.forName("UTF-8"); + static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); /** * Helper called by generated code to construct default values for string @@ -406,6 +408,7 @@ public class Internal { public static final CodedInputStream EMPTY_CODED_INPUT_STREAM = CodedInputStream.newInstance(EMPTY_BYTE_ARRAY); + /** * Provides an immutable view of {@code List<T>} around a {@code List<F>}. * diff --git a/java/core/src/main/java/com/google/protobuf/LazyField.java b/java/core/src/main/java/com/google/protobuf/LazyField.java index 5e0a485c..3da8b900 100644 --- a/java/core/src/main/java/com/google/protobuf/LazyField.java +++ b/java/core/src/main/java/com/google/protobuf/LazyField.java @@ -39,14 +39,14 @@ import java.util.Map.Entry; * * Most of key methods are implemented in {@link LazyFieldLite} but this class * can contain default instance of the message to provide {@code hashCode()}, - * {@code equals()} and {@code toString()}. + * {@code euqals()} and {@code toString()}. * * @author xiangl@google.com (Xiang Li) */ public class LazyField extends LazyFieldLite { /** - * Carry a message's default instance which is used by {@code hashCode()}, {@code equals()} and + * Carry a message's default instance which is used by {@code hashCode()}, {@code euqals()} and * {@code toString()}. */ private final MessageLite defaultInstance; diff --git a/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java b/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java index eea1fe3c..016ec20d 100644 --- a/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java +++ b/java/core/src/main/java/com/google/protobuf/LazyFieldLite.java @@ -30,14 +30,26 @@ package com.google.protobuf; +import java.io.IOException; + /** * LazyFieldLite encapsulates the logic of lazily parsing message fields. It stores - * the message in a ByteString initially and then parse it on-demand. + * the message in a ByteString initially and then parses it on-demand. + * + * LazyFieldLite is thread-compatible: concurrent reads are safe once the proto that this + * LazyFieldLite is a part of is no longer being mutated by its Builder. However, explicit + * synchronization is needed under read/write situations. * - * LazyField is thread-compatible e.g. concurrent read are safe, however, - * synchronizations are needed under read/write situations. + * When a LazyFieldLite is used in the context of a MessageLite object, its behavior is considered + * to be immutable and none of the setter methods in its API are expected to be invoked. All of the + * getters are expected to be thread-safe. When used in the context of a MessageLite.Builder, + * setters can be invoked, but there is no guarantee of thread safety. + * + * TODO(yatin,dweis): Consider splitting this class's functionality and put the mutable methods + * into a separate builder class to allow us to give stronger compile-time guarantees. * - * This class is internal implementation detail, so you don't need to use it directly. + * This class is internal implementation detail of the protobuf library, so you don't need to use it + * directly. * * @author xiangl@google.com (Xiang Li) */ @@ -46,8 +58,34 @@ public class LazyFieldLite { ExtensionRegistryLite.getEmptyRegistry(); /** - * A delayed-parsed version of the bytes. When this is non-null then {@code extensionRegistry } is - * also non-null and {@code value} and {@code memoizedBytes} are null. + * The value associated with the LazyFieldLite object is stored in one or more of the following + * three fields (delayedBytes, value, memoizedBytes). They should together be interpreted as + * follows. + * 1) delayedBytes can be non-null, while value and memoizedBytes is null. The object will be in + * this state while the value for the object has not yet been parsed. + * 2) Both delayedBytes and value are non-null. The object transitions to this state as soon as + * some caller needs to access the value (by invoking getValue()). + * 3) memoizedBytes is merely an optimization for calls to LazyFieldLite.toByteString() to avoid + * recomputing the ByteString representation on each call. Instead, when the value is parsed + * from delayedBytes, we will also assign the contents of delayedBytes to memoizedBytes (since + * that is the ByteString representation of value). + * 4) Finally, if the LazyFieldLite was created directly with a parsed MessageLite value, then + * delayedBytes will be null, and memoizedBytes will be initialized only upon the first call to + * LazyFieldLite.toByteString(). + * + * Given the above conditions, any caller that needs a serialized representation of this object + * must first check if the memoizedBytes or delayedBytes ByteString is non-null and use it + * directly; if both of those are null, it can look at the parsed value field. Similarly, any + * caller that needs a parsed value must first check if the value field is already non-null, if + * not it must parse the value from delayedBytes. + */ + + /** + * A delayed-parsed version of the contents of this field. When this field is non-null, then the + * "value" field is allowed to be null until the time that the value needs to be read. + * + * When delayedBytes is non-null then {@code extensionRegistry} is required to also be non-null. + * {@code value} and {@code memoizedBytes} will be initialized lazily. */ private ByteString delayedBytes; @@ -60,12 +98,15 @@ public class LazyFieldLite { private ExtensionRegistryLite extensionRegistry; /** - * The parsed value. When this is non-null then {@code delayedBytes} will be null. + * The parsed value. When this is null and a caller needs access to the MessageLite value, then + * {@code delayedBytes} will be parsed lazily at that time. */ protected volatile MessageLite value; /** - * The memoized bytes for {@code value}. Will be null when {@code value} is null. + * The memoized bytes for {@code value}. This is an optimization for the toByteString() method to + * not have to recompute its return-value on each invocation. + * TODO(yatin): Figure out whether this optimization is actually necessary. */ private volatile ByteString memoizedBytes; @@ -230,6 +271,46 @@ public class LazyFieldLite { return; } } + + /** + * Merges another instance's contents from a stream. + * + * <p>LazyField is not thread-safe for write access. Synchronizations are needed + * under read/write situations. + */ + public void mergeFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry) + throws IOException { + if (this.containsDefaultInstance()) { + setByteString(input.readBytes(), extensionRegistry); + return; + } + + // If the other field has an extension registry but this does not, copy over the other extension + // registry. + if (this.extensionRegistry == null) { + this.extensionRegistry = extensionRegistry; + } + + // In the case that both of them are not parsed we simply concatenate the bytes to save time. In + // the (probably rare) case that they have different extension registries there is a chance that + // some of the extensions may be dropped, but the tradeoff of making this operation fast seems + // to outway the benefits of combining the extension registries, which is not normally done for + // lite protos anyways. + if (this.delayedBytes != null) { + setByteString(this.delayedBytes.concat(input.readBytes()), this.extensionRegistry); + return; + } + + // We are parsed and both contain data. We won't drop any extensions here directly, but in the + // case that the extension registries are not the same then we might in the future if we + // need to serialize and parse a message again. + try { + setValue(value.toBuilder().mergeFrom(input, extensionRegistry).build()); + } catch (InvalidProtocolBufferException e) { + // Nothing is logged and no exceptions are thrown. Clients will be unaware that a proto + // was invalid. + } + } private static MessageLite mergeValueAndBytes( MessageLite value, ByteString otherBytes, ExtensionRegistryLite extensionRegistry) { @@ -259,10 +340,10 @@ public class LazyFieldLite { * parsed. Be careful when using this method. */ public int getSerializedSize() { - if (delayedBytes != null) { - return delayedBytes.size(); - } else if (memoizedBytes != null) { + if (memoizedBytes != null) { return memoizedBytes.size(); + } else if (delayedBytes != null) { + return delayedBytes.size(); } else if (value != null) { return value.getSerializedSize(); } else { @@ -274,12 +355,12 @@ public class LazyFieldLite { * Returns a BytesString for this field in a thread-safe way. */ public ByteString toByteString() { - if (delayedBytes != null) { - return delayedBytes; - } if (memoizedBytes != null) { return memoizedBytes; } + if (delayedBytes != null) { + return delayedBytes; + } synchronized (this) { if (memoizedBytes != null) { return memoizedBytes; @@ -311,18 +392,15 @@ public class LazyFieldLite { .parseFrom(delayedBytes, extensionRegistry); this.value = parsedValue; this.memoizedBytes = delayedBytes; - this.delayedBytes = null; } else { this.value = defaultInstance; this.memoizedBytes = ByteString.EMPTY; - this.delayedBytes = null; } } catch (InvalidProtocolBufferException e) { // Nothing is logged and no exceptions are thrown. Clients will be unaware that this proto // was invalid. this.value = defaultInstance; this.memoizedBytes = ByteString.EMPTY; - this.delayedBytes = null; } } } diff --git a/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java b/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java index c3be3cca..68c430cf 100644 --- a/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java +++ b/java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java @@ -30,12 +30,12 @@ package com.google.protobuf; -import java.util.Arrays; -import java.util.List; import java.util.AbstractList; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.RandomAccess; /** diff --git a/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java b/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java index e69de29b..2a6e0e30 100644 --- a/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java +++ b/java/core/src/main/java/com/google/protobuf/MessageLiteToString.java @@ -0,0 +1,200 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Helps generate {@link String} representations of {@link MessageLite} protos. + */ +final class MessageLiteToString { + /** + * Suffix for *_FIELD_NUMBER fields. This is used to reflectively detect proto fields that should + * be toString()ed. + */ + private static final String FIELD_NUMBER_NAME_SUFFIX = "_FIELD_NUMBER"; + + /** + * Returns a {@link String} representation of the {@link MessageLite} object. The first line of + * the {@code String} representation representation includes a comment string to uniquely identify + * the objcet instance. This acts as an indicator that this should not be relied on for + * comparisons. + * + * <p>For use by generated code only. + */ + static String toString(MessageLite messageLite, String commentString) { + StringBuilder buffer = new StringBuilder(); + buffer.append("# ").append(commentString); + reflectivePrintWithIndent(messageLite, buffer, 0); + return buffer.toString(); + } + + /** + * Reflectively prints the {@link MessageLite} to the buffer at given {@code indent} level. + * + * @param buffer the buffer to write to + * @param indent the number of spaces to indent the proto by + */ + private static void reflectivePrintWithIndent( + MessageLite messageLite, StringBuilder buffer, int indent) { + // Build a map of method name to method. We're looking for methods like getFoo(), hasFoo(), and + // getFooList() which might be useful for building an object's string representation. + Map<String, Method> nameToNoArgMethod = new HashMap<String, Method>(); + for (Method method : messageLite.getClass().getDeclaredMethods()) { + if (method.getParameterTypes().length == 0) { + nameToNoArgMethod.put(method.getName(), method); + } + } + + for (Field field : messageLite.getClass().getDeclaredFields()) { + String fieldName = field.getName(); + // Skip all fields that aren't in a format like "FOO_BAR_FIELD_NUMBER" + if (!fieldName.endsWith(FIELD_NUMBER_NAME_SUFFIX)) { + continue; + } + + // For "FOO_BAR_FIELD_NUMBER" his would be "FOO_BAR" + String upperUnderscore = + fieldName.substring(0, fieldName.length() - FIELD_NUMBER_NAME_SUFFIX.length()); + + // For "FOO_BAR_FIELD_NUMBER" his would be "FooBar" + String upperCamelCaseName = upperUnderscoreToUpperCamel(upperUnderscore); + + // Try to reflectively get the value and toString() the field as if it were optional. This + // only works if the method names have not be proguarded out or renamed. + Method getMethod = nameToNoArgMethod.get("get" + upperCamelCaseName); + Method hasMethod = nameToNoArgMethod.get("has" + upperCamelCaseName); + if (getMethod != null && hasMethod != null) { + if ((Boolean) GeneratedMessageLite.invokeOrDie(hasMethod, messageLite)) { + printField( + buffer, + indent, + upperUnderscore.toLowerCase(), + GeneratedMessageLite.invokeOrDie(getMethod, messageLite)); + } + continue; + } + + // Try to reflectively get the value and toString() the field as if it were repeated. This + // only works if the method names have not be proguarded out or renamed. + Method listMethod = nameToNoArgMethod.get("get" + upperCamelCaseName + "List"); + if (listMethod != null) { + printField( + buffer, + indent, + upperUnderscore.toLowerCase(), + GeneratedMessageLite.invokeOrDie(listMethod, messageLite)); + continue; + } + } + + if (messageLite instanceof GeneratedMessageLite.ExtendableMessage) { + Iterator<Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object>> iter = + ((GeneratedMessageLite.ExtendableMessage<?, ?>) messageLite).extensions.iterator(); + while (iter.hasNext()) { + Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object> entry = iter.next(); + printField(buffer, indent, "[" + entry.getKey().getNumber() + "]", entry.getValue()); + } + } + + if (((GeneratedMessageLite) messageLite).unknownFields != null) { + ((GeneratedMessageLite) messageLite).unknownFields.printWithIndent(buffer, indent); + } + } + + /** + * Formats a text proto field. + * + * <p>For use by generated code only. + * + * @param buffer the buffer to write to + * @param indent the number of spaces the proto should be indented by + * @param name the field name (in lower underscore case) + * @param object the object value of the field + */ + static final void printField(StringBuilder buffer, int indent, String name, Object object) { + if (object instanceof List<?>) { + List<?> list = (List<?>) object; + for (Object entry : list) { + printField(buffer, indent, name, entry); + } + return; + } + + buffer.append('\n'); + for (int i = 0; i < indent; i++) { + buffer.append(' '); + } + buffer.append(name); + + if (object instanceof String) { + buffer.append(": \"").append(TextFormatEscaper.escapeText((String) object)).append('"'); + } else if (object instanceof ByteString) { + buffer.append(": \"").append(TextFormatEscaper.escapeBytes((ByteString) object)).append('"'); + } else if (object instanceof GeneratedMessageLite) { + buffer.append(" {"); + reflectivePrintWithIndent((GeneratedMessageLite) object, buffer, indent + 2); + buffer.append("\n"); + for (int i = 0; i < indent; i++) { + buffer.append(' '); + } + buffer.append("}"); + } else { + buffer.append(": ").append(object.toString()); + } + } + + /** + * A Guava-less implementation of: + * {@code CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, upperUnderscore)} + */ + private static String upperUnderscoreToUpperCamel(String upperUnderscore) { + String upperCamelCaseName = ""; + boolean nextCharacterShouldBeUpper = true; + for (int i = 0; i < upperUnderscore.length(); i++) { + char ch = upperUnderscore.charAt(i); + if (ch == '_') { + nextCharacterShouldBeUpper = true; + } else if (nextCharacterShouldBeUpper){ + upperCamelCaseName += Character.toUpperCase(ch); + nextCharacterShouldBeUpper = false; + } else { + upperCamelCaseName += Character.toLowerCase(ch); + } + } + return upperCamelCaseName; + } +} diff --git a/java/core/src/main/java/com/google/protobuf/NioByteString.java b/java/core/src/main/java/com/google/protobuf/NioByteString.java index f71e41b2..6163c7b1 100644 --- a/java/core/src/main/java/com/google/protobuf/NioByteString.java +++ b/java/core/src/main/java/com/google/protobuf/NioByteString.java @@ -30,15 +30,14 @@ package com.google.protobuf; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InvalidObjectException; import java.io.ObjectInputStream; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.InvalidMarkException; -import java.nio.channels.Channels; import java.nio.charset.Charset; import java.util.Collections; import java.util.List; @@ -54,7 +53,7 @@ final class NioByteString extends ByteString.LeafByteString { throw new NullPointerException("buffer"); } - this.buffer = buffer.slice(); + this.buffer = buffer.slice().order(ByteOrder.nativeOrder()); } // ================================================================= @@ -119,7 +118,7 @@ final class NioByteString extends ByteString.LeafByteString { @Override public void writeTo(OutputStream out) throws IOException { - writeToInternal(out, buffer.position(), buffer.remaining()); + out.write(toByteArray()); } @Override @@ -137,14 +136,12 @@ final class NioByteString extends ByteString.LeafByteString { return; } - // Slow path - if (out instanceof FileOutputStream || numberToWrite >= 8192) { - // Use a channel to write out the ByteBuffer. - Channels.newChannel(out).write(slice(sourceOffset, sourceOffset + numberToWrite)); - } else { - // Just copy the data to an array and write it. - out.write(toByteArray()); - } + ByteBufferWriter.write(slice(sourceOffset, sourceOffset + numberToWrite), out); + } + + @Override + void writeTo(ByteOutput output) throws IOException { + output.writeLazy(buffer.slice()); } @Override @@ -159,46 +156,30 @@ final class NioByteString extends ByteString.LeafByteString { @Override protected String toStringInternal(Charset charset) { - byte[] bytes; - int offset; + final byte[] bytes; + final int offset; + final int length; if (buffer.hasArray()) { bytes = buffer.array(); offset = buffer.arrayOffset() + buffer.position(); + length = buffer.remaining(); } else { + // TODO(nathanmittler): Can we optimize this? bytes = toByteArray(); offset = 0; + length = bytes.length; } - return new String(bytes, offset, size(), charset); + return new String(bytes, offset, length, charset); } @Override public boolean isValidUtf8() { - // TODO(nathanmittler): add a ByteBuffer fork for Utf8.isValidUtf8 to avoid the copy - byte[] bytes; - int startIndex; - if (buffer.hasArray()) { - bytes = buffer.array(); - startIndex = buffer.arrayOffset() + buffer.position(); - } else { - bytes = toByteArray(); - startIndex = 0; - } - return Utf8.isValidUtf8(bytes, startIndex, startIndex + size()); + return Utf8.isValidUtf8(buffer); } @Override protected int partialIsValidUtf8(int state, int offset, int length) { - // TODO(nathanmittler): TODO add a ByteBuffer fork for Utf8.partialIsValidUtf8 to avoid the copy - byte[] bytes; - int startIndex; - if (buffer.hasArray()) { - bytes = buffer.array(); - startIndex = buffer.arrayOffset() + buffer.position(); - } else { - bytes = toByteArray(); - startIndex = 0; - } - return Utf8.partialIsValidUtf8(state, bytes, startIndex, startIndex + size()); + return Utf8.partialIsValidUtf8(state, buffer, offset, offset + length); } @Override diff --git a/java/core/src/main/java/com/google/protobuf/Parser.java b/java/core/src/main/java/com/google/protobuf/Parser.java index 3fa11c3b..6db69247 100644 --- a/java/core/src/main/java/com/google/protobuf/Parser.java +++ b/java/core/src/main/java/com/google/protobuf/Parser.java @@ -30,7 +30,6 @@ package com.google.protobuf; -import java.io.IOException; import java.io.InputStream; /** diff --git a/java/core/src/main/java/com/google/protobuf/RopeByteString.java b/java/core/src/main/java/com/google/protobuf/RopeByteString.java index 8badfabd..3f3e9bd1 100644 --- a/java/core/src/main/java/com/google/protobuf/RopeByteString.java +++ b/java/core/src/main/java/com/google/protobuf/RopeByteString.java @@ -48,10 +48,11 @@ import java.util.Stack; /** * Class to represent {@code ByteStrings} formed by concatenation of other * ByteStrings, without copying the data in the pieces. The concatenation is - * represented as a tree whose leaf nodes are each a {@link LiteralByteString}. + * represented as a tree whose leaf nodes are each a + * {@link com.google.protobuf.ByteString.LeafByteString}. * * <p>Most of the operation here is inspired by the now-famous paper <a - * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> + * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and * michael plass * @@ -139,8 +140,9 @@ final class RopeByteString extends ByteString { /** * Concatenate the given strings while performing various optimizations to * slow the growth rate of tree depth and tree node count. The result is - * either a {@link LiteralByteString} or a {@link RopeByteString} - * depending on which optimizations, if any, were applied. + * either a {@link com.google.protobuf.ByteString.LeafByteString} or a + * {@link RopeByteString} depending on which optimizations, if any, were + * applied. * * <p>Small pieces of length less than {@link * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in @@ -294,8 +296,7 @@ final class RopeByteString extends ByteString { * * <p>Substrings of {@code length < 2} should result in at most a single * recursive call chain, terminating at a leaf node. Thus the result will be a - * {@link LiteralByteString}. {@link #RopeByteString(ByteString, - * ByteString)}. + * {@link com.google.protobuf.ByteString.LeafByteString}. * * @param beginIndex start at this index * @param endIndex the last character is the one before this index @@ -368,7 +369,7 @@ final class RopeByteString extends ByteString { @Override public List<ByteBuffer> asReadOnlyByteBufferList() { - // Walk through the list of LiteralByteString's that make up this + // Walk through the list of LeafByteString's that make up this // rope, and add each one as a read-only ByteBuffer. List<ByteBuffer> result = new ArrayList<ByteBuffer>(); PieceIterator pieces = new PieceIterator(this); @@ -400,6 +401,12 @@ final class RopeByteString extends ByteString { } @Override + void writeTo(ByteOutput output) throws IOException { + left.writeTo(output); + right.writeTo(output); + } + + @Override protected String toStringInternal(Charset charset) { return new String(toByteArray(), charset); } @@ -709,9 +716,10 @@ final class RopeByteString extends ByteString { } /** - * Returns the next item and advances one {@code LiteralByteString}. + * Returns the next item and advances one + * {@link com.google.protobuf.ByteString.LeafByteString}. * - * @return next non-empty LiteralByteString or {@code null} + * @return next non-empty LeafByteString or {@code null} */ @Override public LeafByteString next() { diff --git a/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java b/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java index 0674d2e2..dff19328 100644 --- a/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java +++ b/java/core/src/main/java/com/google/protobuf/SmallSortedMap.java @@ -35,12 +35,12 @@ import java.util.AbstractSet; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; -import java.util.TreeMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.SortedMap; +import java.util.TreeMap; /** * A custom map implementation from FieldDescriptor to Object optimized to diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java index c99b5285..edf114fa 100644 --- a/java/core/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/core/src/main/java/com/google/protobuf/TextFormat.java @@ -425,7 +425,7 @@ public final class TextFormat { case STRING: generator.print("\""); generator.print(escapeNonAscii - ? escapeText((String) value) + ? TextFormatEscaper.escapeText((String) value) : escapeDoubleQuotesAndBackslashes((String) value) .replace("\n", "\\n")); generator.print("\""); @@ -661,6 +661,14 @@ public final class TextFormat { nextToken(); } + int getLine() { + return line; + } + + int getColumn() { + return column; + } + /** Are we at the end of the input? */ public boolean atEnd() { return currentToken.length() == 0; @@ -1074,7 +1082,7 @@ public final class TextFormat { private ParseException floatParseException(final NumberFormatException e) { return parseException("Couldn't parse number: " + e.getMessage()); } - + /** * Returns a {@link UnknownFieldParseException} with the line and column * numbers of the previous token in the description, and the unknown field @@ -1133,7 +1141,7 @@ public final class TextFormat { return column; } } - + /** * Thrown when encountering an unknown field while parsing * a text format message. @@ -1257,11 +1265,14 @@ public final class TextFormat { private final boolean allowUnknownFields; private final SingularOverwritePolicy singularOverwritePolicy; + private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; - private Parser(boolean allowUnknownFields, - SingularOverwritePolicy singularOverwritePolicy) { + private Parser( + boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy, + TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { this.allowUnknownFields = allowUnknownFields; this.singularOverwritePolicy = singularOverwritePolicy; + this.parseInfoTreeBuilder = parseInfoTreeBuilder; } /** @@ -1278,6 +1289,7 @@ public final class TextFormat { private boolean allowUnknownFields = false; private SingularOverwritePolicy singularOverwritePolicy = SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; + private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; /** @@ -1288,8 +1300,15 @@ public final class TextFormat { return this; } + public Builder setParseInfoTreeBuilder( + TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { + this.parseInfoTreeBuilder = parseInfoTreeBuilder; + return this; + } + public Parser build() { - return new Parser(allowUnknownFields, singularOverwritePolicy); + return new Parser( + allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder); } } @@ -1380,7 +1399,21 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target) throws ParseException { + mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder); + } + + /** + * Parse a single field from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void mergeField(final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target, + TextFormatParseInfoTree.Builder parseTreeBuilder) + throws ParseException { FieldDescriptor field = null; + int startLine = tokenizer.getLine(); + int startColumn = tokenizer.getColumn(); final Descriptor type = target.getDescriptorForType(); ExtensionRegistry.ExtensionInfo extension = null; @@ -1472,14 +1505,51 @@ public final class TextFormat { // Handle potential ':'. if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { tokenizer.tryConsume(":"); // optional + if (parseTreeBuilder != null) { + TextFormatParseInfoTree.Builder childParseTreeBuilder = + parseTreeBuilder.getBuilderForSubMessageField(field); + consumeFieldValues(tokenizer, extensionRegistry, target, field, extension, + childParseTreeBuilder); + } else { + consumeFieldValues(tokenizer, extensionRegistry, target, field, extension, + parseTreeBuilder); + } } else { tokenizer.consume(":"); // required + consumeFieldValues( + tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder); } + + if (parseTreeBuilder != null) { + parseTreeBuilder.setLocation( + field, TextFormatParseLocation.create(startLine, startColumn)); + } + + // For historical reasons, fields may optionally be separated by commas or + // semicolons. + if (!tokenizer.tryConsume(";")) { + tokenizer.tryConsume(","); + } + } + + /** + * Parse a one or more field values from {@code tokenizer} and merge it into + * {@code builder}. + */ + private void consumeFieldValues( + final Tokenizer tokenizer, + final ExtensionRegistry extensionRegistry, + final MessageReflection.MergeTarget target, + final FieldDescriptor field, + final ExtensionRegistry.ExtensionInfo extension, + final TextFormatParseInfoTree.Builder parseTreeBuilder) + throws ParseException { // Support specifying repeated field values as a comma-separated list. // Ex."foo: [1, 2, 3]" if (field.isRepeated() && tokenizer.tryConsume("[")) { while (true) { - consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); + consumeFieldValue(tokenizer, extensionRegistry, target, field, extension, + parseTreeBuilder); if (tokenizer.tryConsume("]")) { // End of list. break; @@ -1487,13 +1557,8 @@ public final class TextFormat { tokenizer.consume(","); } } else { - consumeFieldValue(tokenizer, extensionRegistry, target, field, extension); - } - - // For historical reasons, fields may optionally be separated by commas or - // semicolons. - if (!tokenizer.tryConsume(";")) { - tokenizer.tryConsume(","); + consumeFieldValue( + tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder); } } @@ -1506,7 +1571,8 @@ public final class TextFormat { final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, - final ExtensionRegistry.ExtensionInfo extension) + final ExtensionRegistry.ExtensionInfo extension, + final TextFormatParseInfoTree.Builder parseTreeBuilder) throws ParseException { Object value = null; @@ -1528,7 +1594,7 @@ public final class TextFormat { throw tokenizer.parseException( "Expected \"" + endToken + "\"."); } - mergeField(tokenizer, extensionRegistry, subField); + mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder); } value = subField.finish(); @@ -1704,52 +1770,6 @@ public final class TextFormat { // Some of these methods are package-private because Descriptors.java uses // them. - private interface ByteSequence { - int size(); - byte byteAt(int offset); - } - - /** - * Escapes bytes in the format used in protocol buffer text format, which - * is the same as the format used for C string literals. All bytes - * that are not printable 7-bit ASCII characters are escaped, as well as - * backslash, single-quote, and double-quote characters. Characters for - * which no defined short-hand escape sequence is defined will be escaped - * using 3-digit octal sequences. - */ - public static String escapeBytes(final ByteSequence input) { - final StringBuilder builder = new StringBuilder(input.size()); - for (int i = 0; i < input.size(); i++) { - final byte b = input.byteAt(i); - switch (b) { - // Java does not recognize \a or \v, apparently. - case 0x07: builder.append("\\a"); break; - case '\b': builder.append("\\b"); break; - case '\f': builder.append("\\f"); break; - case '\n': builder.append("\\n"); break; - case '\r': builder.append("\\r"); break; - case '\t': builder.append("\\t"); break; - case 0x0b: builder.append("\\v"); break; - case '\\': builder.append("\\\\"); break; - case '\'': builder.append("\\\'"); break; - case '"' : builder.append("\\\""); break; - default: - // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are - // printable. Other byte values must be escaped. - if (b >= 0x20 && b <= 0x7e) { - builder.append((char) b); - } else { - builder.append('\\'); - builder.append((char) ('0' + ((b >>> 6) & 3))); - builder.append((char) ('0' + ((b >>> 3) & 7))); - builder.append((char) ('0' + (b & 7))); - } - break; - } - } - return builder.toString(); - } - /** * Escapes bytes in the format used in protocol buffer text format, which * is the same as the format used for C string literals. All bytes @@ -1758,33 +1778,15 @@ public final class TextFormat { * which no defined short-hand escape sequence is defined will be escaped * using 3-digit octal sequences. */ - public static String escapeBytes(final ByteString input) { - return escapeBytes(new ByteSequence() { - @Override - public int size() { - return input.size(); - } - @Override - public byte byteAt(int offset) { - return input.byteAt(offset); - } - }); + public static String escapeBytes(ByteString input) { + return TextFormatEscaper.escapeBytes(input); } /** * Like {@link #escapeBytes(ByteString)}, but used for byte array. */ - public static String escapeBytes(final byte[] input) { - return escapeBytes(new ByteSequence() { - @Override - public int size() { - return input.length; - } - @Override - public byte byteAt(int offset) { - return input[offset]; - } - }); + public static String escapeBytes(byte[] input) { + return TextFormatEscaper.escapeBytes(input); } /** @@ -1868,7 +1870,9 @@ public final class TextFormat { } } - return ByteString.copyFrom(result, 0, pos); + return result.length == pos + ? ByteString.wrap(result) // This reference has not been out of our control. + : ByteString.copyFrom(result, 0, pos); } /** @@ -1896,7 +1900,7 @@ public final class TextFormat { * Escape double quotes and backslashes in a String for unicode output of a message. */ public static String escapeDoubleQuotesAndBackslashes(final String input) { - return input.replace("\\", "\\\\").replace("\"", "\\\""); + return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); } /** diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java b/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java index e69de29b..da9ceadd 100644 --- a/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java +++ b/java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java @@ -0,0 +1,137 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +/** + * Provide text format escaping support for proto2 instances. + */ +final class TextFormatEscaper { + private TextFormatEscaper() {} + + private interface ByteSequence { + int size(); + byte byteAt(int offset); + } + + /** + * Escapes bytes in the format used in protocol buffer text format, which + * is the same as the format used for C string literals. All bytes + * that are not printable 7-bit ASCII characters are escaped, as well as + * backslash, single-quote, and double-quote characters. Characters for + * which no defined short-hand escape sequence is defined will be escaped + * using 3-digit octal sequences. + */ + static String escapeBytes(final ByteSequence input) { + final StringBuilder builder = new StringBuilder(input.size()); + for (int i = 0; i < input.size(); i++) { + final byte b = input.byteAt(i); + switch (b) { + // Java does not recognize \a or \v, apparently. + case 0x07: builder.append("\\a"); break; + case '\b': builder.append("\\b"); break; + case '\f': builder.append("\\f"); break; + case '\n': builder.append("\\n"); break; + case '\r': builder.append("\\r"); break; + case '\t': builder.append("\\t"); break; + case 0x0b: builder.append("\\v"); break; + case '\\': builder.append("\\\\"); break; + case '\'': builder.append("\\\'"); break; + case '"' : builder.append("\\\""); break; + default: + // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are + // printable. Other byte values must be escaped. + if (b >= 0x20 && b <= 0x7e) { + builder.append((char) b); + } else { + builder.append('\\'); + builder.append((char) ('0' + ((b >>> 6) & 3))); + builder.append((char) ('0' + ((b >>> 3) & 7))); + builder.append((char) ('0' + (b & 7))); + } + break; + } + } + return builder.toString(); + } + + /** + * Escapes bytes in the format used in protocol buffer text format, which + * is the same as the format used for C string literals. All bytes + * that are not printable 7-bit ASCII characters are escaped, as well as + * backslash, single-quote, and double-quote characters. Characters for + * which no defined short-hand escape sequence is defined will be escaped + * using 3-digit octal sequences. + */ + static String escapeBytes(final ByteString input) { + return escapeBytes(new ByteSequence() { + @Override + public int size() { + return input.size(); + } + @Override + public byte byteAt(int offset) { + return input.byteAt(offset); + } + }); + } + + /** + * Like {@link #escapeBytes(ByteString)}, but used for byte array. + */ + static String escapeBytes(final byte[] input) { + return escapeBytes(new ByteSequence() { + @Override + public int size() { + return input.length; + } + @Override + public byte byteAt(int offset) { + return input[offset]; + } + }); + } + + /** + * Like {@link #escapeBytes(ByteString)}, but escapes a text string. + * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped + * individually as a 3-digit octal escape. Yes, it's weird. + */ + static String escapeText(final String input) { + return escapeBytes(ByteString.copyFromUtf8(input)); + } + + /** + * Escape double quotes and backslashes in a String for unicode output of a message. + */ + static String escapeDoubleQuotesAndBackslashes(final String input) { + return input.replace("\\", "\\\\").replace("\"", "\\\""); + } +} diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java b/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java new file mode 100644 index 00000000..2ecf912e --- /dev/null +++ b/java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java @@ -0,0 +1,225 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import com.google.protobuf.Descriptors.FieldDescriptor; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + + +/** + * Data structure which is populated with the locations of each field value parsed from the text. + * + * <p>The locations of primary fields values are retrieved by {@code getLocation} or + * {@code getLocations}. The locations of sub message values are within nested + * {@code TextFormatParseInfoTree}s and are retrieve by {@getNestedTree} or {code @getNestedTrees}. + * + * <p>The {@code TextFormatParseInfoTree} is created by a Builder. + */ +public class TextFormatParseInfoTree { + + // Defines a mapping between each field's descriptor to the list of locations where + // its value(s) were was encountered. + private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField; + + // Defines a mapping between a field's descriptor to a list of TextFormatParseInfoTrees for + // sub message location information. + Map<FieldDescriptor, List<TextFormatParseInfoTree>> subtreesFromField; + + /** + * Construct a {@code TextFormatParseInfoTree}. + * + * @param locationsFromField a map of fields to location in the source code + * @param subtreeBuildersFromField a map of fields to parse tree location information builders + */ + private TextFormatParseInfoTree( + Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField, + Map<FieldDescriptor, List<TextFormatParseInfoTree.Builder>> subtreeBuildersFromField) { + + // The maps are unmodifiable. The values in the maps are unmodifiable. + Map<FieldDescriptor, List<TextFormatParseLocation>> locs = + new HashMap<FieldDescriptor, List<TextFormatParseLocation>>(); + for (Entry<FieldDescriptor, List<TextFormatParseLocation>> kv : locationsFromField.entrySet()) { + locs.put(kv.getKey(), Collections.unmodifiableList(kv.getValue())); + } + this.locationsFromField = Collections.unmodifiableMap(locs); + + Map<FieldDescriptor, List<TextFormatParseInfoTree>> subs = + new HashMap<FieldDescriptor, List<TextFormatParseInfoTree>>(); + for (Entry<FieldDescriptor, List<Builder>> kv : subtreeBuildersFromField.entrySet()) { + List<TextFormatParseInfoTree> submessagesOfField = new ArrayList<TextFormatParseInfoTree>(); + for (Builder subBuilder : kv.getValue()) { + submessagesOfField.add(subBuilder.build()); + } + subs.put(kv.getKey(), Collections.unmodifiableList(submessagesOfField)); + } + this.subtreesFromField = Collections.unmodifiableMap(subs); + } + + /** + * Retrieve all the locations of a field. + * + * @param fieldDescriptor the the @{link FieldDescriptor} of the desired field + * @return a list of the locations of values of the field. If there are not values + * or the field doesn't exist, an empty list is returned. + */ + public List<TextFormatParseLocation> getLocations(final FieldDescriptor fieldDescriptor) { + List<TextFormatParseLocation> result = locationsFromField.get(fieldDescriptor); + return (result == null) ? Collections.<TextFormatParseLocation>emptyList() : result; + } + + /** + * Get the location in the source of a field's value. + * + * <p>Returns the {@link TextFormatParseLocation} for index-th value of the field in the parsed + * text. + * + * @param fieldDescriptor the @{link FieldDescriptor} of the desired field + * @param index the index of the value. + * @return the {@link TextFormatParseLocation} of the value + * @throws IllegalArgumentException index is out of range + */ + public TextFormatParseLocation getLocation(final FieldDescriptor fieldDescriptor, int index) { + return getFromList(getLocations(fieldDescriptor), index, fieldDescriptor); + } + + /** + * Retrieve a list of all the location information trees for a sub message field. + * + * @param fieldDescriptor the @{link FieldDescriptor} of the desired field + * @return A list of {@link TextFormatParseInfoTree} + */ + public List<TextFormatParseInfoTree> getNestedTrees(final FieldDescriptor fieldDescriptor) { + List<TextFormatParseInfoTree> result = subtreesFromField.get(fieldDescriptor); + return result == null ? Collections.<TextFormatParseInfoTree>emptyList() : result; + } + + /** + * Returns the parse info tree for the given field, which must be a message type. + * + * @param fieldDescriptor the @{link FieldDescriptor} of the desired sub message + * @param index the index of message value. + * @return the {@code ParseInfoTree} of the message value. {@code null} is returned if the field + * doesn't exist or the index is out of range. + * @throws IllegalArgumentException if index is out of range + */ + public TextFormatParseInfoTree getNestedTree(final FieldDescriptor fieldDescriptor, int index) { + return getFromList(getNestedTrees(fieldDescriptor), index, fieldDescriptor); + } + + /** + * Create a builder for a {@code ParseInfoTree}. + * + * @return the builder + */ + public static Builder builder() { + return new Builder(); + } + + private static <T> T getFromList(List<T> list, int index, FieldDescriptor fieldDescriptor) { + if (index >= list.size() || index < 0) { + throw new IllegalArgumentException(String.format("Illegal index field: %s, index %d", + fieldDescriptor == null ? "<null>" : fieldDescriptor.getName(), index)); + } + return list.get(index); + } + + /** + * Builder for a {@link TextFormatParseInfoTree}. + */ + public static class Builder { + + private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField; + + // Defines a mapping between a field's descriptor to a list of ParseInfoTrees builders for + // sub message location information. + private Map<FieldDescriptor, List<Builder>> subtreeBuildersFromField; + + /** + * Create a root level {@ParseInfoTree} builder. + */ + private Builder() { + locationsFromField = new HashMap<FieldDescriptor, List<TextFormatParseLocation>>(); + subtreeBuildersFromField = new HashMap<FieldDescriptor, List<Builder>>(); + } + + /** + * Record the starting location of a single value for a field. + * + * @param fieldDescriptor the field + * @param location source code location information + */ + public Builder setLocation( + final FieldDescriptor fieldDescriptor, TextFormatParseLocation location) { + List<TextFormatParseLocation> fieldLocations = locationsFromField.get(fieldDescriptor); + if (fieldLocations == null) { + fieldLocations = new ArrayList<TextFormatParseLocation>(); + locationsFromField.put(fieldDescriptor, fieldLocations); + } + fieldLocations.add(location); + return this; + } + + /** + * Set for a sub message. + * + * <p>A new builder is created for a sub message. The builder that is returned is a new builder. + * The return is <emph>not</emph> the invoked {@code builder.getBuilderForSubMessageField}. + * + * @param fieldDescriptor the field whose value is the submessage + * @return a new Builder for the sub message + */ + public Builder getBuilderForSubMessageField(final FieldDescriptor fieldDescriptor) { + List<Builder> submessageBuilders = subtreeBuildersFromField.get(fieldDescriptor); + if (submessageBuilders == null) { + submessageBuilders = new ArrayList<Builder>(); + subtreeBuildersFromField.put(fieldDescriptor, submessageBuilders); + } + Builder subtreeBuilder = new Builder(); + submessageBuilders.add(subtreeBuilder); + return subtreeBuilder; + } + + /** + * Build the {@code TextFormatParseInfoTree}. + * + * @return the {@code TextFormatParseInfoTree} + */ + public TextFormatParseInfoTree build() { + return new TextFormatParseInfoTree(locationsFromField, subtreeBuildersFromField); + } + } +} diff --git a/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java b/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java new file mode 100644 index 00000000..cce286e1 --- /dev/null +++ b/java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java @@ -0,0 +1,104 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.util.Arrays; + +/** + * A location in the source code. + * + * <p>A location is the starting line number and starting column number. + */ +public final class TextFormatParseLocation { + + /** + * The empty location. + */ + public static final TextFormatParseLocation EMPTY = new TextFormatParseLocation(-1, -1); + + /** + * Create a location. + * + * @param line the starting line number + * @param column the starting column number + * @return a {@code ParseLocation} + */ + static TextFormatParseLocation create(int line, int column) { + if (line == -1 && column == -1) { + return EMPTY; + } + if (line < 0 || column < 0) { + throw new IllegalArgumentException( + String.format("line and column values must be >= 0: line %d, column: %d", line, column)); + } + return new TextFormatParseLocation(line, column); + } + + private final int line; + private final int column; + + private TextFormatParseLocation(int line, int column) { + this.line = line; + this.column = column; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + + @Override + public String toString() { + return String.format("ParseLocation{line=%d, column=%d}", line, column); + } + + @Override + public boolean equals(Object o) { + if (o == this) { + return true; + } + if (!(o instanceof TextFormatParseLocation)) { + return false; + } + TextFormatParseLocation that = (TextFormatParseLocation) o; + return (this.line == that.getLine()) + && (this.column == that.getColumn()); + } + + @Override + public int hashCode() { + int[] values = {line, column}; + return Arrays.hashCode(values); + } +} diff --git a/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java b/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java index 435ad4d4..9500f905 100644 --- a/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java +++ b/java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java @@ -61,15 +61,6 @@ public final class UnknownFieldSetLite { public static UnknownFieldSetLite getDefaultInstance() { return DEFAULT_INSTANCE; } - - /** - * Returns an empty {@code UnknownFieldSetLite.Builder}. - * - * <p>For use by generated code only. - */ - public static Builder newBuilder() { - return new Builder(); - } /** * Returns a new mutable instance. @@ -262,6 +253,21 @@ public final class UnknownFieldSetLite { return hashCode; } + /** + * Prints a String representation of the unknown field set. + * + * <p>For use by generated code only. + * + * @param buffer the buffer to write to + * @param indent the number of spaces the fields should be indented by + */ + final void printWithIndent(StringBuilder buffer, int indent) { + for (int i = 0; i < count; i++) { + int fieldNumber = WireFormat.getTagFieldNumber(tags[i]); + MessageLiteToString.printField(buffer, indent, String.valueOf(fieldNumber), objects[i]); + } + } + private void storeField(int tag, Object value) { ensureCapacity(); @@ -369,90 +375,4 @@ public final class UnknownFieldSetLite { } return this; } - - /** - * Builder for {@link UnknownFieldSetLite}s. - * - * <p>Use {@link UnknownFieldSet#newBuilder()} to construct a {@code Builder}. - * - * <p>For use by generated code only. - */ - // TODO(dweis): Update the mutable API to no longer need this builder and delete. - public static final class Builder { - - private UnknownFieldSetLite set; - - private Builder() { - this.set = null; - } - - /** - * Ensures internal state is initialized for use. - */ - private void ensureNotBuilt() { - if (set == null) { - set = new UnknownFieldSetLite(); - } - - set.checkMutable(); - } - - /** - * Parse a single field from {@code input} and merge it into this set. - * - * <p>For use by generated code only. - * - * @param tag The field's tag number, which was already parsed. - * @return {@code false} if the tag is an end group tag. - */ - boolean mergeFieldFrom(final int tag, final CodedInputStream input) throws IOException { - ensureNotBuilt(); - return set.mergeFieldFrom(tag, input); - } - - /** - * Convenience method for merging a new field containing a single varint - * value. This is used in particular when an unknown enum value is - * encountered. - * - * <p>For use by generated code only. - */ - Builder mergeVarintField(int fieldNumber, int value) { - ensureNotBuilt(); - set.mergeVarintField(fieldNumber, value); - return this; - } - - /** - * Convenience method for merging a length-delimited field. - * - * <p>For use by generated code only. - */ - public Builder mergeLengthDelimitedField(final int fieldNumber, final ByteString value) { - ensureNotBuilt(); - set.mergeLengthDelimitedField(fieldNumber, value); - return this; - } - - /** - * Build the {@link UnknownFieldSetLite} and return it. - * - * <p>Once {@code build()} has been called, the {@code Builder} will no - * longer be usable. Calling any method after {@code build()} will result - * in undefined behavior and can cause an - * {@code UnsupportedOperationException} to be thrown. - * - * <p>For use by generated code only. - */ - public UnknownFieldSetLite build() { - if (set == null) { - return DEFAULT_INSTANCE; - } - - set.checkMutable(); - set.makeImmutable(); - - return set; - } - } } diff --git a/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java b/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java index f443ee39..0fbf4d40 100644 --- a/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java +++ b/java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java @@ -30,6 +30,7 @@ package com.google.protobuf; +import java.io.IOException; import java.nio.ByteBuffer; /** @@ -49,8 +50,8 @@ public final class UnsafeByteOperations { /** * An unsafe operation that returns a {@link ByteString} that is backed by the provided buffer. * - * @param buffer the Java NIO buffer to be wrapped. - * @return a {@link ByteString} backed by the provided buffer. + * @param buffer the Java NIO buffer to be wrapped + * @return a {@link ByteString} backed by the provided buffer */ public static ByteString unsafeWrap(ByteBuffer buffer) { if (buffer.hasArray()) { @@ -60,4 +61,24 @@ public final class UnsafeByteOperations { return new NioByteString(buffer); } } + + /** + * Writes the given {@link ByteString} to the provided {@link ByteOutput}. Calling this method may + * result in multiple operations on the target {@link ByteOutput} + * (i.e. for roped {@link ByteString}s). + * + * <p>This method exposes the internal backing buffer(s) of the {@link ByteString} to the {@link + * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious + * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution! + * + * <p> NOTE: The {@link ByteOutput} <strong>MUST NOT</strong> modify the provided buffers. Doing + * so may result in corrupted data, which would be difficult to debug. + * + * @param bytes the {@link ByteString} to be written + * @param output the output to receive the bytes + * @throws IOException if an I/O error occurs + */ + public static void unsafeWriteTo(ByteString bytes, ByteOutput output) throws IOException { + bytes.writeTo(output); + } } diff --git a/java/core/src/main/java/com/google/protobuf/Utf8.java b/java/core/src/main/java/com/google/protobuf/Utf8.java index 48c7e9e6..308c69e9 100644 --- a/java/core/src/main/java/com/google/protobuf/Utf8.java +++ b/java/core/src/main/java/com/google/protobuf/Utf8.java @@ -30,6 +30,19 @@ package com.google.protobuf; +import static java.lang.Character.MAX_SURROGATE; +import static java.lang.Character.MIN_SURROGATE; +import static java.lang.Character.isSurrogatePair; +import static java.lang.Character.toCodePoint; + +import java.lang.reflect.Field; +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.util.logging.Level; +import java.util.logging.Logger; + /** * A set of low-level, high-performance static utility methods related * to the UTF-8 character encoding. This class has no dependencies @@ -64,9 +77,24 @@ package com.google.protobuf; * * @author martinrb@google.com (Martin Buchholz) */ +// TODO(nathanmittler): Copy changes in this class back to Guava final class Utf8 { - private Utf8() {} - + private static final Logger logger = Logger.getLogger(Utf8.class.getName()); + + /** + * UTF-8 is a runtime hot spot so we attempt to provide heavily optimized implementations + * depending on what is available on the platform. The processor is the platform-optimized + * delegate for which all methods are delegated directly to. + */ + private static final Processor processor = + UnsafeProcessor.isAvailable() ? new UnsafeProcessor() : new SafeProcessor(); + + /** + * A mask used when performing unsafe reads to determine if a long value contains any non-ASCII + * characters (i.e. any byte >= 0x80). + */ + private static final long ASCII_MASK_LONG = 0x8080808080808080L; + /** * Maximum number of bytes per Java UTF-16 char in UTF-8. * @see java.nio.charset.CharsetEncoder#maxBytesPerChar() @@ -85,6 +113,18 @@ final class Utf8 { */ public static final int MALFORMED = -1; + /** + * Used by {@code Unsafe} UTF-8 string validation logic to determine the minimum string length + * above which to employ an optimized algorithm for counting ASCII characters. The reason for this + * threshold is that for small strings, the optimization may not be beneficial or may even + * negatively impact performance since it requires additional logic to avoid unaligned reads + * (when calling {@code Unsafe.getLong}). This threshold guarantees that even if the initial + * offset is unaligned, we're guaranteed to make at least one call to {@code Unsafe.getLong()} + * which provides a performance improvement that entirely subsumes the cost of the additional + * logic. + */ + private static final int UNSAFE_COUNT_ASCII_THRESHOLD = 16; + // Other state values include the partial bytes of the incomplete // character to be decoded in the simplest way: we pack the bytes // into the state int in little-endian order. For example: @@ -112,7 +152,7 @@ final class Utf8 { * isValidUtf8(bytes, 0, bytes.length)}. */ public static boolean isValidUtf8(byte[] bytes) { - return isValidUtf8(bytes, 0, bytes.length); + return processor.isValidUtf8(bytes, 0, bytes.length); } /** @@ -125,7 +165,7 @@ final class Utf8 { * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}. */ public static boolean isValidUtf8(byte[] bytes, int index, int limit) { - return partialIsValidUtf8(bytes, index, limit) == COMPLETE; + return processor.isValidUtf8(bytes, index, limit); } /** @@ -146,183 +186,8 @@ final class Utf8 { * decode the character when passed to a subsequent invocation of a * partial decoding method. */ - public static int partialIsValidUtf8( - int state, byte[] bytes, int index, int limit) { - if (state != COMPLETE) { - // The previous decoding operation was incomplete (or malformed). - // We look for a well-formed sequence consisting of bytes from - // the previous decoding operation (stored in state) together - // with bytes from the array slice. - // - // We expect such "straddler characters" to be rare. - - if (index >= limit) { // No bytes? No progress. - return state; - } - int byte1 = (byte) state; - // byte1 is never ASCII. - if (byte1 < (byte) 0xE0) { - // two-byte form - - // Simultaneously checks for illegal trailing-byte in - // leading position and overlong 2-byte form. - if (byte1 < (byte) 0xC2 || - // byte2 trailing-byte test - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } else if (byte1 < (byte) 0xF0) { - // three-byte form - - // Get byte2 from saved state or array - int byte2 = (byte) ~(state >> 8); - if (byte2 == 0) { - byte2 = bytes[index++]; - if (index >= limit) { - return incompleteStateFor(byte1, byte2); - } - } - if (byte2 > (byte) 0xBF || - // overlong? 5 most significant bits must not all be zero - (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) || - // illegal surrogate codepoint? - (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) || - // byte3 trailing-byte test - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } else { - // four-byte form - - // Get byte2 and byte3 from saved state or array - int byte2 = (byte) ~(state >> 8); - int byte3 = 0; - if (byte2 == 0) { - byte2 = bytes[index++]; - if (index >= limit) { - return incompleteStateFor(byte1, byte2); - } - } else { - byte3 = (byte) (state >> 16); - } - if (byte3 == 0) { - byte3 = bytes[index++]; - if (index >= limit) { - return incompleteStateFor(byte1, byte2, byte3); - } - } - - // If we were called with state == MALFORMED, then byte1 is 0xFF, - // which never occurs in well-formed UTF-8, and so we will return - // MALFORMED again below. - - if (byte2 > (byte) 0xBF || - // Check that 1 <= plane <= 16. Tricky optimized form of: - // if (byte1 > (byte) 0xF4 || - // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || - // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) - (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 || - // byte3 trailing-byte test - byte3 > (byte) 0xBF || - // byte4 trailing-byte test - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } - } - - return partialIsValidUtf8(bytes, index, limit); - } - - /** - * Tells whether the given byte array slice is a well-formed, - * malformed, or incomplete UTF-8 byte sequence. The range of bytes - * to be checked extends from index {@code index}, inclusive, to - * {@code limit}, exclusive. - * - * <p>This is a convenience method, equivalent to a call to {@code - * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}. - * - * @return {@link #MALFORMED} if the partial byte sequence is - * definitely not well-formed, {@link #COMPLETE} if it is well-formed - * (no additional input needed), or if the byte sequence is - * "incomplete", i.e. apparently terminated in the middle of a character, - * an opaque integer "state" value containing enough information to - * decode the character when passed to a subsequent invocation of a - * partial decoding method. - */ - public static int partialIsValidUtf8( - byte[] bytes, int index, int limit) { - // Optimize for 100% ASCII. - // Hotspot loves small simple top-level loops like this. - while (index < limit && bytes[index] >= 0) { - index++; - } - - return (index >= limit) ? COMPLETE : - partialIsValidUtf8NonAscii(bytes, index, limit); - } - - private static int partialIsValidUtf8NonAscii( - byte[] bytes, int index, int limit) { - for (;;) { - int byte1, byte2; - - // Optimize for interior runs of ASCII bytes. - do { - if (index >= limit) { - return COMPLETE; - } - } while ((byte1 = bytes[index++]) >= 0); - - if (byte1 < (byte) 0xE0) { - // two-byte form - - if (index >= limit) { - return byte1; - } - - // Simultaneously checks for illegal trailing-byte in - // leading position and overlong 2-byte form. - if (byte1 < (byte) 0xC2 || - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } else if (byte1 < (byte) 0xF0) { - // three-byte form - - if (index >= limit - 1) { // incomplete sequence - return incompleteStateFor(bytes, index, limit); - } - if ((byte2 = bytes[index++]) > (byte) 0xBF || - // overlong? 5 most significant bits must not all be zero - (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) || - // check for illegal surrogate codepoints - (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) || - // byte3 trailing-byte test - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } else { - // four-byte form - - if (index >= limit - 2) { // incomplete sequence - return incompleteStateFor(bytes, index, limit); - } - if ((byte2 = bytes[index++]) > (byte) 0xBF || - // Check that 1 <= plane <= 16. Tricky optimized form of: - // if (byte1 > (byte) 0xF4 || - // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || - // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) - (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 || - // byte3 trailing-byte test - bytes[index++] > (byte) 0xBF || - // byte4 trailing-byte test - bytes[index++] > (byte) 0xBF) { - return MALFORMED; - } - } - } + public static int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) { + return processor.partialIsValidUtf8(state, bytes, index, limit); } private static int incompleteStateFor(int byte1) { @@ -352,19 +217,31 @@ final class Utf8 { default: throw new AssertionError(); } } - + + private static int incompleteStateFor( + final ByteBuffer buffer, final int byte1, final int index, final int remaining) { + switch (remaining) { + case 0: + return incompleteStateFor(byte1); + case 1: + return incompleteStateFor(byte1, buffer.get(index)); + case 2: + return incompleteStateFor(byte1, buffer.get(index), buffer.get(index + 1)); + default: + throw new AssertionError(); + } + } // These UTF-8 handling methods are copied from Guava's Utf8 class with a modification to throw // a protocol buffer local exception. This exception is then caught in CodedOutputStream so it can // fallback to more lenient behavior. static class UnpairedSurrogateException extends IllegalArgumentException { - private UnpairedSurrogateException(int index, int length) { super("Unpaired surrogate at index " + index + " of " + length); } } - + /** * Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string, * this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in @@ -426,56 +303,1381 @@ final class Utf8 { return utf8Length; } - static int encode(CharSequence sequence, byte[] bytes, int offset, int length) { - int utf16Length = sequence.length(); - int j = offset; - int i = 0; - int limit = offset + length; - // Designed to take advantage of - // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination - for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) { - bytes[j + i] = (byte) c; - } - if (i == utf16Length) { - return j + utf16Length; - } - j += i; - for (char c; i < utf16Length; i++) { - c = sequence.charAt(i); - if (c < 0x80 && j < limit) { - bytes[j++] = (byte) c; - } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes - bytes[j++] = (byte) ((0xF << 6) | (c >>> 6)); - bytes[j++] = (byte) (0x80 | (0x3F & c)); - } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) { - // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes - bytes[j++] = (byte) ((0xF << 5) | (c >>> 12)); - bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6))); - bytes[j++] = (byte) (0x80 | (0x3F & c)); - } else if (j <= limit - 4) { - // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes - final char low; - if (i + 1 == sequence.length() - || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) { - throw new UnpairedSurrogateException((i - 1), utf16Length); - } - int codePoint = Character.toCodePoint(c, low); - bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18)); - bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12))); - bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6))); - bytes[j++] = (byte) (0x80 | (0x3F & codePoint)); + static int encode(CharSequence in, byte[] out, int offset, int length) { + return processor.encodeUtf8(in, out, offset, length); + } + // End Guava UTF-8 methods. + + /** + * Determines if the given {@link ByteBuffer} is a valid UTF-8 string. + * + * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct) + * and the capabilities of the platform. + * + * @param buffer the buffer to check. + * @see Utf8#isValidUtf8(byte[], int, int) + */ + static boolean isValidUtf8(ByteBuffer buffer) { + return processor.isValidUtf8(buffer, buffer.position(), buffer.remaining()); + } + + /** + * Determines if the given {@link ByteBuffer} is a partially valid UTF-8 string. + * + * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct) + * and the capabilities of the platform. + * + * @param buffer the buffer to check. + * @see Utf8#partialIsValidUtf8(int, byte[], int, int) + */ + static int partialIsValidUtf8(int state, ByteBuffer buffer, int index, int limit) { + return processor.partialIsValidUtf8(state, buffer, index, limit); + } + + /** + * Encodes the given characters to the target {@link ByteBuffer} using UTF-8 encoding. + * + * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct) + * and the capabilities of the platform. + * + * @param in the source string to be encoded + * @param out the target buffer to receive the encoded string. + * @see Utf8#encode(CharSequence, byte[], int, int) + */ + static void encodeUtf8(CharSequence in, ByteBuffer out) { + processor.encodeUtf8(in, out); + } + + /** + * Counts (approximately) the number of consecutive ASCII characters in the given buffer. + * The byte order of the {@link ByteBuffer} does not matter, so performance can be improved if + * native byte order is used (i.e. no byte-swapping in {@link ByteBuffer#getLong(int)}). + * + * @param buffer the buffer to be scanned for ASCII chars + * @param index the starting index of the scan + * @param limit the limit within buffer for the scan + * @return the number of ASCII characters found. The stopping position will be at or + * before the first non-ASCII byte. + */ + private static int estimateConsecutiveAscii(ByteBuffer buffer, int index, int limit) { + int i = index; + final int lim = limit - 7; + // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII). + // To speed things up further, we're reading longs instead of bytes so we use a mask to + // determine if any byte in the current long is non-ASCII. + for (; i < lim && (buffer.getLong(i) & ASCII_MASK_LONG) == 0; i += 8) {} + return i - index; + } + + /** + * A processor of UTF-8 strings, providing methods for checking validity and encoding. + */ + // TODO(nathanmittler): Add support for Memory/MemoryBlock on Android. + abstract static class Processor { + /** + * Returns {@code true} if the given byte array slice is a + * well-formed UTF-8 byte sequence. The range of bytes to be + * checked extends from index {@code index}, inclusive, to {@code + * limit}, exclusive. + * + * <p>This is a convenience method, equivalent to {@code + * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}. + */ + final boolean isValidUtf8(byte[] bytes, int index, int limit) { + return partialIsValidUtf8(COMPLETE, bytes, index, limit) == COMPLETE; + } + + /** + * Tells whether the given byte array slice is a well-formed, + * malformed, or incomplete UTF-8 byte sequence. The range of bytes + * to be checked extends from index {@code index}, inclusive, to + * {@code limit}, exclusive. + * + * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding + * operation) or the value returned from a call to a partial decoding method + * for the previous bytes + * + * @return {@link #MALFORMED} if the partial byte sequence is + * definitely not well-formed, {@link #COMPLETE} if it is well-formed + * (no additional input needed), or if the byte sequence is + * "incomplete", i.e. apparently terminated in the middle of a character, + * an opaque integer "state" value containing enough information to + * decode the character when passed to a subsequent invocation of a + * partial decoding method. + */ + abstract int partialIsValidUtf8(int state, byte[] bytes, int index, int limit); + + /** + * Returns {@code true} if the given portion of the {@link ByteBuffer} is a + * well-formed UTF-8 byte sequence. The range of bytes to be + * checked extends from index {@code index}, inclusive, to {@code + * limit}, exclusive. + * + * <p>This is a convenience method, equivalent to {@code + * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}. + */ + final boolean isValidUtf8(ByteBuffer buffer, int index, int limit) { + return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE; + } + + /** + * Indicates whether or not the given buffer contains a valid UTF-8 string. + * + * @param buffer the buffer to check. + * @return {@code true} if the given buffer contains a valid UTF-8 string. + */ + final int partialIsValidUtf8( + final int state, final ByteBuffer buffer, int index, final int limit) { + if (buffer.hasArray()) { + final int offset = buffer.arrayOffset(); + return partialIsValidUtf8(state, buffer.array(), offset + index, offset + limit); + } else if (buffer.isDirect()){ + return partialIsValidUtf8Direct(state, buffer, index, limit); + } + return partialIsValidUtf8Default(state, buffer, index, limit); + } + + /** + * Performs validation for direct {@link ByteBuffer} instances. + */ + abstract int partialIsValidUtf8Direct( + final int state, final ByteBuffer buffer, int index, final int limit); + + /** + * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather + * than potentially faster approaches. This first completes validation for the current + * character (provided by {@code state}) and then finishes validation for the sequence. + */ + final int partialIsValidUtf8Default( + final int state, final ByteBuffer buffer, int index, final int limit) { + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (index >= limit) { // No bytes? No progress. + return state; + } + + byte byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + // byte2 trailing-byte test + || buffer.get(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + byte byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = buffer.get(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // illegal surrogate codepoint? + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || buffer.get(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + byte byte2 = (byte) ~(state >> 8); + byte byte3 = 0; + if (byte2 == 0) { + byte2 = buffer.get(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = buffer.get(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || byte3 > (byte) 0xBF + // byte4 trailing-byte test + || buffer.get(index++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + + // Finish validation for the sequence. + return partialIsValidUtf8(buffer, index, limit); + } + + /** + * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather + * than potentially faster approaches. + */ + private static int partialIsValidUtf8(final ByteBuffer buffer, int index, final int limit) { + index += estimateConsecutiveAscii(buffer, index, limit); + + for (;;) { + // Optimize for interior runs of ASCII bytes. + // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold? + // Maybe after seeing a few in a row that are ASCII, go back to fast mode? + int byte1; + do { + if (index >= limit) { + return COMPLETE; + } + } while ((byte1 = buffer.get(index++)) >= 0); + + // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms. + if (byte1 < (byte) 0xE0) { + // Two-byte form (110xxxxx 10xxxxxx) + if (index >= limit) { + // Incomplete sequence + return byte1; + } + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 || buffer.get(index) > (byte) 0xBF) { + return MALFORMED; + } + index++; + } else if (byte1 < (byte) 0xF0) { + // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx) + if (index >= limit - 1) { + // Incomplete sequence + return incompleteStateFor(buffer, byte1, index, limit - index); + } + + final byte byte2 = buffer.get(index++); + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // check for illegal surrogate codepoints + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || buffer.get(index) > (byte) 0xBF) { + return MALFORMED; + } + index++; + } else { + // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx) + if (index >= limit - 2) { + // Incomplete sequence + return incompleteStateFor(buffer, byte1, index, limit - index); + } + + // TODO(nathanmittler): Consider using getInt() to improve performance. + final int byte2 = buffer.get(index++); + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || buffer.get(index++) > (byte) 0xBF + // byte4 trailing-byte test + || buffer.get(index++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + + /** + * Encodes an input character sequence ({@code in}) to UTF-8 in the target array ({@code out}). + * For a string, this method is similar to + * <pre>{@code + * byte[] a = string.getBytes(UTF_8); + * System.arraycopy(a, 0, bytes, offset, a.length); + * return offset + a.length; + * }</pre> + * + * but is more efficient in both time and space. One key difference is that this method + * requires paired surrogates, and therefore does not support chunking. + * While {@code String.getBytes(UTF_8)} replaces unpaired surrogates with the default + * replacement character, this method throws {@link UnpairedSurrogateException}. + * + * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to + * compute the exact amount needed, or leave room for + * {@code Utf8.MAX_BYTES_PER_CHAR * sequence.length()}, which is the largest possible number + * of bytes that any input can be encoded to. + * + * @param in the input character sequence to be encoded + * @param out the target array + * @param offset the starting offset in {@code bytes} to start writing at + * @param length the length of the {@code bytes}, starting from {@code offset} + * @throws UnpairedSurrogateException if {@code sequence} contains ill-formed UTF-16 (unpaired + * surrogates) + * @throws ArrayIndexOutOfBoundsException if {@code sequence} encoded in UTF-8 is longer than + * {@code bytes.length - offset} + * @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)} + */ + abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length); + + /** + * Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}). + * Upon returning from this method, the {@code out} position will point to the position after + * the last encoded byte. This method requires paired surrogates, and therefore does not + * support chunking. + * + * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to + * compute the exact amount needed, or leave room for + * {@code Utf8.MAX_BYTES_PER_CHAR * in.length()}, which is the largest possible number + * of bytes that any input can be encoded to. + * + * @param in the source character sequence to be encoded + * @param out the target buffer + * @throws UnpairedSurrogateException if {@code in} contains ill-formed UTF-16 (unpaired + * surrogates) + * @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than + * {@code out.remaining()} + */ + final void encodeUtf8(CharSequence in, ByteBuffer out) { + if (out.hasArray()) { + final int offset = out.arrayOffset(); + int endIndex = + Utf8.encode(in, out.array(), offset + out.position(), out.remaining()); + out.position(endIndex - offset); + } else if (out.isDirect()) { + encodeUtf8Direct(in, out); } else { - // If we are surrogates and we're not a surrogate pair, always throw an - // IllegalArgumentException instead of an ArrayOutOfBoundsException. - if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) - && (i + 1 == sequence.length() - || !Character.isSurrogatePair(c, sequence.charAt(i + 1)))) { - throw new UnpairedSurrogateException(i, utf16Length); + encodeUtf8Default(in, out); + } + } + + /** + * Encodes the input character sequence to a direct {@link ByteBuffer} instance. + */ + abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out); + + /** + * Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link + * ByteBuffer} API, rather than potentially faster approaches. + */ + final void encodeUtf8Default(CharSequence in, ByteBuffer out) { + final int inLength = in.length(); + int outIx = out.position(); + int inIx = 0; + + // Since ByteBuffer.putXXX() already checks boundaries for us, no need to explicitly check + // access. Assume the buffer is big enough and let it handle the out of bounds exception + // if it occurs. + try { + // Designed to take advantage of + // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination + for (char c; inIx < inLength && (c = in.charAt(inIx)) < 0x80; ++inIx) { + out.put(outIx + inIx, (byte) c); } - throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j); + if (inIx == inLength) { + // Successfully encoded the entire string. + out.position(outIx + inIx); + return; + } + + outIx += inIx; + for (char c; inIx < inLength; ++inIx, ++outIx) { + c = in.charAt(inIx); + if (c < 0x80) { + // One byte (0xxx xxxx) + out.put(outIx, (byte) c); + } else if (c < 0x800) { + // Two bytes (110x xxxx 10xx xxxx) + + // Benchmarks show put performs better than putShort here (for HotSpot). + out.put(outIx++, (byte) (0xC0 | (c >>> 6))); + out.put(outIx, (byte) (0x80 | (0x3F & c))); + } else if (c < MIN_SURROGATE || MAX_SURROGATE < c) { + // Three bytes (1110 xxxx 10xx xxxx 10xx xxxx) + // Maximum single-char code point is 0xFFFF, 16 bits. + + // Benchmarks show put performs better than putShort here (for HotSpot). + out.put(outIx++, (byte) (0xE0 | (c >>> 12))); + out.put(outIx++, (byte) (0x80 | (0x3F & (c >>> 6)))); + out.put(outIx, (byte) (0x80 | (0x3F & c))); + } else { + // Four bytes (1111 xxxx 10xx xxxx 10xx xxxx 10xx xxxx) + + // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 + // bytes + final char low; + if (inIx + 1 == inLength || !isSurrogatePair(c, (low = in.charAt(++inIx)))) { + throw new UnpairedSurrogateException(inIx, inLength); + } + // TODO(nathanmittler): Consider using putInt() to improve performance. + int codePoint = toCodePoint(c, low); + out.put(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18))); + out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12)))); + out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6)))); + out.put(outIx, (byte) (0x80 | (0x3F & codePoint))); + } + } + + // Successfully encoded the entire string. + out.position(outIx); + } catch (IndexOutOfBoundsException e) { + // TODO(nathanmittler): Consider making the API throw IndexOutOfBoundsException instead. + + // If we failed in the outer ASCII loop, outIx will not have been updated. In this case, + // use inIx to determine the bad write index. + int badWriteIndex = out.position() + Math.max(inIx, outIx - out.position() + 1); + throw new ArrayIndexOutOfBoundsException( + "Failed writing " + in.charAt(inIx) + " at index " + badWriteIndex); } } - return j; } - // End Guava UTF-8 methods. + + /** + * {@link Processor} implementation that does not use any {@code sun.misc.Unsafe} methods. + */ + static final class SafeProcessor extends Processor { + @Override + int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) { + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (index >= limit) { // No bytes? No progress. + return state; + } + int byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + // byte2 trailing-byte test + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + int byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // illegal surrogate codepoint? + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + int byte2 = (byte) ~(state >> 8); + int byte3 = 0; + if (byte2 == 0) { + byte2 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || byte3 > (byte) 0xBF + // byte4 trailing-byte test + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } + } + + return partialIsValidUtf8(bytes, index, limit); + } + + @Override + int partialIsValidUtf8Direct(int state, ByteBuffer buffer, int index, int limit) { + // For safe processing, we have to use the ByteBuffer API. + return partialIsValidUtf8Default(state, buffer, index, limit); + } + + @Override + int encodeUtf8(CharSequence in, byte[] out, int offset, int length) { + int utf16Length = in.length(); + int j = offset; + int i = 0; + int limit = offset + length; + // Designed to take advantage of + // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination + for (char c; i < utf16Length && i + j < limit && (c = in.charAt(i)) < 0x80; i++) { + out[j + i] = (byte) c; + } + if (i == utf16Length) { + return j + utf16Length; + } + j += i; + for (char c; i < utf16Length; i++) { + c = in.charAt(i); + if (c < 0x80 && j < limit) { + out[j++] = (byte) c; + } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes + out[j++] = (byte) ((0xF << 6) | (c >>> 6)); + out[j++] = (byte) (0x80 | (0x3F & c)); + } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) { + // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes + out[j++] = (byte) ((0xF << 5) | (c >>> 12)); + out[j++] = (byte) (0x80 | (0x3F & (c >>> 6))); + out[j++] = (byte) (0x80 | (0x3F & c)); + } else if (j <= limit - 4) { + // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, + // four UTF-8 bytes + final char low; + if (i + 1 == in.length() + || !Character.isSurrogatePair(c, (low = in.charAt(++i)))) { + throw new UnpairedSurrogateException((i - 1), utf16Length); + } + int codePoint = Character.toCodePoint(c, low); + out[j++] = (byte) ((0xF << 4) | (codePoint >>> 18)); + out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12))); + out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6))); + out[j++] = (byte) (0x80 | (0x3F & codePoint)); + } else { + // If we are surrogates and we're not a surrogate pair, always throw an + // UnpairedSurrogateException instead of an ArrayOutOfBoundsException. + if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) + && (i + 1 == in.length() + || !Character.isSurrogatePair(c, in.charAt(i + 1)))) { + throw new UnpairedSurrogateException(i, utf16Length); + } + throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j); + } + } + return j; + } + + @Override + void encodeUtf8Direct(CharSequence in, ByteBuffer out) { + // For safe processing, we have to use the ByteBuffer API. + encodeUtf8Default(in, out); + } + + private static int partialIsValidUtf8(byte[] bytes, int index, int limit) { + // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this). + // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII). + while (index < limit && bytes[index] >= 0) { + index++; + } + + return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, index, limit); + } + + private static int partialIsValidUtf8NonAscii(byte[] bytes, int index, int limit) { + for (;;) { + int byte1, byte2; + + // Optimize for interior runs of ASCII bytes. + do { + if (index >= limit) { + return COMPLETE; + } + } while ((byte1 = bytes[index++]) >= 0); + + if (byte1 < (byte) 0xE0) { + // two-byte form + + if (index >= limit) { + // Incomplete sequence + return byte1; + } + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + if (index >= limit - 1) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes[index++]) > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // check for illegal surrogate codepoints + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + if (index >= limit - 2) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes[index++]) > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || bytes[index++] > (byte) 0xBF + // byte4 trailing-byte test + || bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + } + + /** + * {@link Processor} that uses {@code sun.misc.Unsafe} where possible to improve performance. + */ + static final class UnsafeProcessor extends Processor { + private static final sun.misc.Unsafe UNSAFE = getUnsafe(); + private static final long BUFFER_ADDRESS_OFFSET = + fieldOffset(field(Buffer.class, "address")); + private static final int ARRAY_BASE_OFFSET = byteArrayBaseOffset(); + + /** + * We only use Unsafe operations if we have access to direct {@link ByteBuffer}'s address + * and the array base offset is a multiple of 8 (needed by Unsafe.getLong()). + */ + private static final boolean AVAILABLE = + BUFFER_ADDRESS_OFFSET != -1 && ARRAY_BASE_OFFSET % 8 == 0; + + /** + * Indicates whether or not all required unsafe operations are supported on this platform. + */ + static boolean isAvailable() { + return AVAILABLE; + } + + @Override + int partialIsValidUtf8(int state, byte[] bytes, final int index, final int limit) { + if ((index | limit | bytes.length - limit) < 0) { + throw new ArrayIndexOutOfBoundsException( + String.format("Array length=%d, index=%d, limit=%d", bytes.length, index, limit)); + } + long offset = ARRAY_BASE_OFFSET + index; + final long offsetLimit = ARRAY_BASE_OFFSET + limit; + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (offset >= offsetLimit) { // No bytes? No progress. + return state; + } + int byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + // byte2 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + int byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = UNSAFE.getByte(bytes, offset++); + if (offset >= offsetLimit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // illegal surrogate codepoint? + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + int byte2 = (byte) ~(state >> 8); + int byte3 = 0; + if (byte2 == 0) { + byte2 = UNSAFE.getByte(bytes, offset++); + if (offset >= offsetLimit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = UNSAFE.getByte(bytes, offset++); + if (offset >= offsetLimit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || byte3 > (byte) 0xBF + // byte4 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + + return partialIsValidUtf8(bytes, offset, (int) (offsetLimit - offset)); + } + + @Override + int partialIsValidUtf8Direct( + final int state, ByteBuffer buffer, final int index, final int limit) { + if ((index | limit | buffer.limit() - limit) < 0) { + throw new ArrayIndexOutOfBoundsException( + String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, limit)); + } + long address = addressOffset(buffer) + index; + final long addressLimit = address + (limit - index); + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (address >= addressLimit) { // No bytes? No progress. + return state; + } + + final int byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + // byte2 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + int byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = UNSAFE.getByte(address++); + if (address >= addressLimit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // illegal surrogate codepoint? + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + int byte2 = (byte) ~(state >> 8); + int byte3 = 0; + if (byte2 == 0) { + byte2 = UNSAFE.getByte(address++); + if (address >= addressLimit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = UNSAFE.getByte(address++); + if (address >= addressLimit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || byte3 > (byte) 0xBF + // byte4 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + + return partialIsValidUtf8(address, (int) (addressLimit - address)); + } + + @Override + int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) { + long outIx = ARRAY_BASE_OFFSET + offset; + final long outLimit = outIx + length; + final int inLimit = in.length(); + if (inLimit > length || out.length - length < offset) { + // Not even enough room for an ASCII-encoded string. + throw new ArrayIndexOutOfBoundsException( + "Failed writing " + in.charAt(inLimit - 1) + " at index " + (offset + length)); + } + + // Designed to take advantage of + // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination + int inIx = 0; + for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) { + UNSAFE.putByte(out, outIx++, (byte) c); + } + if (inIx == inLimit) { + // We're done, it was ASCII encoded. + return (int) (outIx - ARRAY_BASE_OFFSET); + } + + for (char c; inIx < inLimit; ++inIx) { + c = in.charAt(inIx); + if (c < 0x80 && outIx < outLimit) { + UNSAFE.putByte(out, outIx++, (byte) c); + } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes + UNSAFE.putByte(out, outIx++, (byte) ((0xF << 6) | (c >>> 6))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c))); + } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) { + // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes + UNSAFE.putByte(out, outIx++, (byte) ((0xF << 5) | (c >>> 12))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (c >>> 6)))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c))); + } else if (outIx <= outLimit - 4L) { + // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 + // bytes + final char low; + if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) { + throw new UnpairedSurrogateException((inIx - 1), inLimit); + } + int codePoint = toCodePoint(c, low); + UNSAFE.putByte(out, outIx++, (byte) ((0xF << 4) | (codePoint >>> 18))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12)))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6)))); + UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & codePoint))); + } else { + if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE) + && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) { + // We are surrogates and we're not a surrogate pair. + throw new UnpairedSurrogateException(inIx, inLimit); + } + // Not enough space in the output buffer. + throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx); + } + } + + // All bytes have been encoded. + return (int) (outIx - ARRAY_BASE_OFFSET); + } + + @Override + void encodeUtf8Direct(CharSequence in, ByteBuffer out) { + final long address = addressOffset(out); + long outIx = address + out.position(); + final long outLimit = address + out.limit(); + final int inLimit = in.length(); + if (inLimit > outLimit - outIx) { + // Not even enough room for an ASCII-encoded string. + throw new ArrayIndexOutOfBoundsException( + "Failed writing " + in.charAt(inLimit - 1) + " at index " + out.limit()); + } + + // Designed to take advantage of + // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination + int inIx = 0; + for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) { + UNSAFE.putByte(outIx++, (byte) c); + } + if (inIx == inLimit) { + // We're done, it was ASCII encoded. + out.position((int) (outIx - address)); + return; + } + + for (char c; inIx < inLimit; ++inIx) { + c = in.charAt(inIx); + if (c < 0x80 && outIx < outLimit) { + UNSAFE.putByte(outIx++, (byte) c); + } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes + UNSAFE.putByte(outIx++, (byte) ((0xF << 6) | (c >>> 6))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c))); + } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) { + // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes + UNSAFE.putByte(outIx++, (byte) ((0xF << 5) | (c >>> 12))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (c >>> 6)))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c))); + } else if (outIx <= outLimit - 4L) { + // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 + // bytes + final char low; + if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) { + throw new UnpairedSurrogateException((inIx - 1), inLimit); + } + int codePoint = toCodePoint(c, low); + UNSAFE.putByte(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12)))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6)))); + UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & codePoint))); + } else { + if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE) + && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) { + // We are surrogates and we're not a surrogate pair. + throw new UnpairedSurrogateException(inIx, inLimit); + } + // Not enough space in the output buffer. + throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx); + } + } + + // All bytes have been encoded. + out.position((int) (outIx - address)); + } + + /** + * Counts (approximately) the number of consecutive ASCII characters starting from the given + * position, using the most efficient method available to the platform. + * + * @param bytes the array containing the character sequence + * @param offset the offset position of the index (same as index + arrayBaseOffset) + * @param maxChars the maximum number of characters to count + * @return the number of ASCII characters found. The stopping position will be at or + * before the first non-ASCII byte. + */ + private static int unsafeEstimateConsecutiveAscii( + byte[] bytes, long offset, final int maxChars) { + int remaining = maxChars; + if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) { + // Don't bother with small strings. + return 0; + } + + // Read bytes until 8-byte aligned so that we can read longs in the loop below. + // Byte arrays are already either 8 or 16-byte aligned, so we just need to make sure that + // the index (relative to the start of the array) is also 8-byte aligned. We do this by + // ANDing the index with 7 to determine the number of bytes that need to be read before + // we're 8-byte aligned. + final int unaligned = (int) offset & 7; + for (int j = unaligned; j > 0; j--) { + if (UNSAFE.getByte(bytes, offset++) < 0) { + return unaligned - j; + } + } + + // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII). + // To speed things up further, we're reading longs instead of bytes so we use a mask to + // determine if any byte in the current long is non-ASCII. + remaining -= unaligned; + for (; remaining >= 8 && (UNSAFE.getLong(bytes, offset) & ASCII_MASK_LONG) == 0; + offset += 8, remaining -= 8) {} + return maxChars - remaining; + } + + /** + * Same as {@link Utf8#estimateConsecutiveAscii(ByteBuffer, int, int)} except that it uses the + * most efficient method available to the platform. + */ + private static int unsafeEstimateConsecutiveAscii(long address, final int maxChars) { + int remaining = maxChars; + if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) { + // Don't bother with small strings. + return 0; + } + + // Read bytes until 8-byte aligned so that we can read longs in the loop below. + // We do this by ANDing the address with 7 to determine the number of bytes that need to + // be read before we're 8-byte aligned. + final int unaligned = (int) address & 7; + for (int j = unaligned; j > 0; j--) { + if (UNSAFE.getByte(address++) < 0) { + return unaligned - j; + } + } + + // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII). + // To speed things up further, we're reading longs instead of bytes so we use a mask to + // determine if any byte in the current long is non-ASCII. + remaining -= unaligned; + for (; remaining >= 8 && (UNSAFE.getLong(address) & ASCII_MASK_LONG) == 0; + address += 8, remaining -= 8) {} + return maxChars - remaining; + } + + private static int partialIsValidUtf8(final byte[] bytes, long offset, int remaining) { + // Skip past ASCII characters as quickly as possible. + final int skipped = unsafeEstimateConsecutiveAscii(bytes, offset, remaining); + remaining -= skipped; + offset += skipped; + + for (;;) { + // Optimize for interior runs of ASCII bytes. + // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold? + // Maybe after seeing a few in a row that are ASCII, go back to fast mode? + int byte1 = 0; + for (; remaining > 0 && (byte1 = UNSAFE.getByte(bytes, offset++)) >= 0; --remaining) { + } + if (remaining == 0) { + return COMPLETE; + } + remaining--; + + // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms. + if (byte1 < (byte) 0xE0) { + // Two-byte form (110xxxxx 10xxxxxx) + if (remaining == 0) { + // Incomplete sequence + return byte1; + } + remaining--; + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx) + if (remaining < 2) { + // Incomplete sequence + return unsafeIncompleteStateFor(bytes, byte1, offset, remaining); + } + remaining -= 2; + + final int byte2; + if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // check for illegal surrogate codepoints + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx) + if (remaining < 3) { + // Incomplete sequence + return unsafeIncompleteStateFor(bytes, byte1, offset, remaining); + } + remaining -= 3; + + final int byte2; + if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF + // byte4 trailing-byte test + || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + + private static int partialIsValidUtf8(long address, int remaining) { + // Skip past ASCII characters as quickly as possible. + final int skipped = unsafeEstimateConsecutiveAscii(address, remaining); + address += skipped; + remaining -= skipped; + + for (;;) { + // Optimize for interior runs of ASCII bytes. + // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold? + // Maybe after seeing a few in a row that are ASCII, go back to fast mode? + int byte1 = 0; + for (; remaining > 0 && (byte1 = UNSAFE.getByte(address++)) >= 0; --remaining) { + } + if (remaining == 0) { + return COMPLETE; + } + remaining--; + + if (byte1 < (byte) 0xE0) { + // Two-byte form + + if (remaining == 0) { + // Incomplete sequence + return byte1; + } + remaining--; + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // Three-byte form + + if (remaining < 2) { + // Incomplete sequence + return unsafeIncompleteStateFor(address, byte1, remaining); + } + remaining -= 2; + + final byte byte2 = UNSAFE.getByte(address++); + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // check for illegal surrogate codepoints + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // Four-byte form + + if (remaining < 3) { + // Incomplete sequence + return unsafeIncompleteStateFor(address, byte1, remaining); + } + remaining -= 3; + + final byte byte2 = UNSAFE.getByte(address++); + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF + // byte4 trailing-byte test + || UNSAFE.getByte(address++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + + private static int unsafeIncompleteStateFor(byte[] bytes, int byte1, long offset, + int remaining) { + switch (remaining) { + case 0: { + return incompleteStateFor(byte1); + } + case 1: { + return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset)); + } + case 2: { + return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset), + UNSAFE.getByte(bytes, offset + 1)); + } + default: { + throw new AssertionError(); + } + } + } + + private static int unsafeIncompleteStateFor(long address, final int byte1, int remaining) { + switch (remaining) { + case 0: { + return incompleteStateFor(byte1); + } + case 1: { + return incompleteStateFor(byte1, UNSAFE.getByte(address)); + } + case 2: { + return incompleteStateFor(byte1, UNSAFE.getByte(address), UNSAFE.getByte(address + 1)); + } + default: { + throw new AssertionError(); + } + } + } + + /** + * Gets the field with the given name within the class, or {@code null} if not found. If + * found, the field is made accessible. + */ + private static Field field(Class<?> clazz, String fieldName) { + Field field; + try { + field = clazz.getDeclaredField(fieldName); + field.setAccessible(true); + } catch (Throwable t) { + // Failed to access the fields. + field = null; + } + logger.log(Level.FINEST, "{0}.{1}: {2}", + new Object[] {clazz.getName(), fieldName, (field != null ? "available" : "unavailable")}); + return field; + } + + /** + * Returns the offset of the provided field, or {@code -1} if {@code sun.misc.Unsafe} is not + * available. + */ + private static long fieldOffset(Field field) { + return field == null || UNSAFE == null ? -1 : UNSAFE.objectFieldOffset(field); + } + + /** + * Get the base offset for byte arrays, or {@code -1} if {@code sun.misc.Unsafe} is not + * available. + */ + private static <T> int byteArrayBaseOffset() { + return UNSAFE == null ? -1 : UNSAFE.arrayBaseOffset(byte[].class); + } + + /** + * Gets the offset of the {@code address} field of the given direct {@link ByteBuffer}. + */ + private static long addressOffset(ByteBuffer buffer) { + return UNSAFE.getLong(buffer, BUFFER_ADDRESS_OFFSET); + } + + /** + * Gets the {@code sun.misc.Unsafe} instance, or {@code null} if not available on this + * platform. + */ + private static sun.misc.Unsafe getUnsafe() { + sun.misc.Unsafe unsafe = null; + try { + unsafe = AccessController.doPrivileged(new PrivilegedExceptionAction<sun.misc.Unsafe>() { + @Override + public sun.misc.Unsafe run() throws Exception { + Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class; + + // Check that this platform supports all of the required unsafe methods. + checkRequiredMethods(k); + + for (Field f : k.getDeclaredFields()) { + f.setAccessible(true); + Object x = f.get(null); + if (k.isInstance(x)) { + return k.cast(x); + } + } + // The sun.misc.Unsafe field does not exist. + return null; + } + }); + } catch (Throwable e) { + // Catching Throwable here due to the fact that Google AppEngine raises NoClassDefFoundError + // for Unsafe. + } + + logger.log(Level.FINEST, "sun.misc.Unsafe: {}", + unsafe != null ? "available" : "unavailable"); + return unsafe; + } + + /** + * Verifies that all required methods of {@code sun.misc.Unsafe} are available on this platform. + */ + private static void checkRequiredMethods(Class<sun.misc.Unsafe> clazz) + throws NoSuchMethodException, SecurityException { + // Needed for Unsafe byte[] access + clazz.getMethod("arrayBaseOffset", Class.class); + clazz.getMethod("getByte", Object.class, long.class); + clazz.getMethod("putByte", Object.class, long.class, byte.class); + clazz.getMethod("getLong", Object.class, long.class); + + // Needed for Unsafe Direct ByteBuffer access + clazz.getMethod("objectFieldOffset", Field.class); + clazz.getMethod("getByte", long.class); + clazz.getMethod("getLong", Object.class, long.class); + clazz.getMethod("putByte", long.class, byte.class); + clazz.getMethod("getLong", long.class); + } + } + + private Utf8() {} } |