diff options
Diffstat (limited to 'java/src/main/java')
33 files changed, 4519 insertions, 629 deletions
diff --git a/java/src/main/java/com/google/protobuf/AbstractMessage.java b/java/src/main/java/com/google/protobuf/AbstractMessage.java index b9d83016..f4d115de 100644 --- a/java/src/main/java/com/google/protobuf/AbstractMessage.java +++ b/java/src/main/java/com/google/protobuf/AbstractMessage.java @@ -32,6 +32,7 @@ package com.google.protobuf; import com.google.protobuf.Descriptors.Descriptor; import com.google.protobuf.Descriptors.FieldDescriptor; +import com.google.protobuf.GeneratedMessage.ExtendableBuilder; import com.google.protobuf.Internal.EnumLite; import java.io.IOException; @@ -81,6 +82,25 @@ public abstract class AbstractMessage extends AbstractMessageLite return true; } + public List<String> findInitializationErrors() { + return Builder.findMissingFields(this); + } + + public String getInitializationErrorString() { + return delimitWithCommas(findInitializationErrors()); + } + + private static String delimitWithCommas(List<String> parts) { + StringBuilder result = new StringBuilder(); + for (String part : parts) { + if (result.length() > 0) { + result.append(", "); + } + result.append(part); + } + return result.toString(); + } + @Override public final String toString() { return TextFormat.printToString(this); @@ -209,6 +229,15 @@ public abstract class AbstractMessage extends AbstractMessageLite } /** + * Package private helper method for AbstractParser to create + * UninitializedMessageException with missing field information. + */ + @Override + UninitializedMessageException newUninitializedMessageException() { + return Builder.newUninitializedMessageException(this); + } + + /** * Helper method for implementing {@link Message#hashCode()}. * <p> * This is needed because {@link java.lang.Enum#hashCode()} is final, but we @@ -251,6 +280,14 @@ public abstract class AbstractMessage extends AbstractMessageLite return (BuilderType) this; } + public List<String> findInitializationErrors() { + return findMissingFields(this); + } + + public String getInitializationErrorString() { + return delimitWithCommas(findInitializationErrors()); + } + public BuilderType mergeFrom(final Message other) { if (other.getDescriptorForType() != getDescriptorForType()) { throw new IllegalArgumentException( @@ -314,7 +351,7 @@ public abstract class AbstractMessage extends AbstractMessageLite } if (!mergeFieldFrom(input, unknownFields, extensionRegistry, - this, tag)) { + getDescriptorForType(), this, null, tag)) { // end group tag break; } @@ -323,25 +360,93 @@ public abstract class AbstractMessage extends AbstractMessageLite return (BuilderType) this; } + /** helper method to handle {@code builder} and {@code extensions}. */ + private static void addRepeatedField( + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + FieldDescriptor field, + Object value) { + if (builder != null) { + builder.addRepeatedField(field, value); + } else { + extensions.addRepeatedField(field, value); + } + } + + /** helper method to handle {@code builder} and {@code extensions}. */ + private static void setField( + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + FieldDescriptor field, + Object value) { + if (builder != null) { + builder.setField(field, value); + } else { + extensions.setField(field, value); + } + } + + /** helper method to handle {@code builder} and {@code extensions}. */ + private static boolean hasOriginalMessage( + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + FieldDescriptor field) { + if (builder != null) { + return builder.hasField(field); + } else { + return extensions.hasField(field); + } + } + + /** helper method to handle {@code builder} and {@code extensions}. */ + private static Message getOriginalMessage( + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + FieldDescriptor field) { + if (builder != null) { + return (Message) builder.getField(field); + } else { + return (Message) extensions.getField(field); + } + } + + /** helper method to handle {@code builder} and {@code extensions}. */ + private static void mergeOriginalMessage( + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + FieldDescriptor field, + Message.Builder subBuilder) { + Message originalMessage = getOriginalMessage(builder, extensions, field); + if (originalMessage != null) { + subBuilder.mergeFrom(originalMessage); + } + } + /** - * Like {@link #mergeFrom(CodedInputStream, UnknownFieldSet.Builder, - * ExtensionRegistryLite, Message.Builder)}, but parses a single field. + * Like {@link #mergeFrom(CodedInputStream, ExtensionRegistryLite)}, but + * parses a single field. + * + * When {@code builder} is not null, the method will parse and merge the + * field into {@code builder}. Otherwise, it will try to parse the field + * into {@code extensions}, when it's called by the parsing constructor in + * generated classes. + * * Package-private because it is used by GeneratedMessage.ExtendableMessage. * @param tag The tag, which should have already been read. * @return {@code true} unless the tag is an end-group tag. */ static boolean mergeFieldFrom( - final CodedInputStream input, - final UnknownFieldSet.Builder unknownFields, - final ExtensionRegistryLite extensionRegistry, - final Message.Builder builder, - final int tag) throws IOException { - final Descriptor type = builder.getDescriptorForType(); - + CodedInputStream input, + UnknownFieldSet.Builder unknownFields, + ExtensionRegistryLite extensionRegistry, + Descriptor type, + Message.Builder builder, + FieldSet<FieldDescriptor> extensions, + int tag) throws IOException { if (type.getOptions().getMessageSetWireFormat() && tag == WireFormat.MESSAGE_SET_ITEM_TAG) { mergeMessageSetExtensionFromCodedStream( - input, unknownFields, extensionRegistry, builder); + input, unknownFields, extensionRegistry, type, builder, extensions); return true; } @@ -376,8 +481,10 @@ public abstract class AbstractMessage extends AbstractMessageLite } else { field = null; } - } else { + } else if (builder != null) { field = type.findFieldByNumber(fieldNumber); + } else { + field = null; } boolean unknown = false; @@ -413,13 +520,13 @@ public abstract class AbstractMessage extends AbstractMessageLite // enum, drop it (don't even add it to unknownFields). return true; } - builder.addRepeatedField(field, value); + addRepeatedField(builder, extensions, field, value); } } else { while (input.getBytesUntilLimit() > 0) { final Object value = FieldSet.readPrimitiveField(input, field.getLiteType()); - builder.addRepeatedField(field, value); + addRepeatedField(builder, extensions, field, value); } } input.popLimit(limit); @@ -434,10 +541,10 @@ public abstract class AbstractMessage extends AbstractMessageLite subBuilder = builder.newBuilderForField(field); } if (!field.isRepeated()) { - subBuilder.mergeFrom((Message) builder.getField(field)); + mergeOriginalMessage(builder, extensions, field, subBuilder); } input.readGroup(field.getNumber(), subBuilder, extensionRegistry); - value = subBuilder.build(); + value = subBuilder.buildPartial(); break; } case MESSAGE: { @@ -448,10 +555,10 @@ public abstract class AbstractMessage extends AbstractMessageLite subBuilder = builder.newBuilderForField(field); } if (!field.isRepeated()) { - subBuilder.mergeFrom((Message) builder.getField(field)); + mergeOriginalMessage(builder, extensions, field, subBuilder); } input.readMessage(subBuilder, extensionRegistry); - value = subBuilder.build(); + value = subBuilder.buildPartial(); break; } case ENUM: @@ -470,22 +577,28 @@ public abstract class AbstractMessage extends AbstractMessageLite } if (field.isRepeated()) { - builder.addRepeatedField(field, value); + addRepeatedField(builder, extensions, field, value); } else { - builder.setField(field, value); + setField(builder, extensions, field, value); } } return true; } - /** Called by {@code #mergeFieldFrom()} to parse a MessageSet extension. */ + /** + * Called by {@code #mergeFieldFrom()} to parse a MessageSet extension. + * If {@code builder} is not null, this method will merge MessageSet into + * the builder. Otherwise, it will merge the MessageSet into {@code + * extensions}. + */ private static void mergeMessageSetExtensionFromCodedStream( - final CodedInputStream input, - final UnknownFieldSet.Builder unknownFields, - final ExtensionRegistryLite extensionRegistry, - final Message.Builder builder) throws IOException { - final Descriptor type = builder.getDescriptorForType(); + CodedInputStream input, + UnknownFieldSet.Builder unknownFields, + ExtensionRegistryLite extensionRegistry, + Descriptor type, + Message.Builder builder, + FieldSet<FieldDescriptor> extensions) throws IOException { // The wire format for MessageSet is: // message MessageSet { @@ -504,10 +617,11 @@ public abstract class AbstractMessage extends AbstractMessageLite // should be prepared to accept them. int typeId = 0; - ByteString rawBytes = null; // If we encounter "message" before "typeId" - Message.Builder subBuilder = null; - FieldDescriptor field = null; + ByteString rawBytes = null; // If we encounter "message" before "typeId" + ExtensionRegistry.ExtensionInfo extension = null; + // Read bytes from input, if we get it's type first then parse it eagerly, + // otherwise we store the raw bytes in a local variable. while (true) { final int tag = input.readTag(); if (tag == 0) { @@ -516,75 +630,121 @@ public abstract class AbstractMessage extends AbstractMessageLite if (tag == WireFormat.MESSAGE_SET_TYPE_ID_TAG) { typeId = input.readUInt32(); - // Zero is not a valid type ID. if (typeId != 0) { - final ExtensionRegistry.ExtensionInfo extension; - // extensionRegistry may be either ExtensionRegistry or - // ExtensionRegistryLite. Since the type we are parsing is a full + // ExtensionRegistryLite. Since the type we are parsing is a full // message, only a full ExtensionRegistry could possibly contain - // extensions of it. Otherwise we will treat the registry as if it + // extensions of it. Otherwise we will treat the registry as if it // were empty. if (extensionRegistry instanceof ExtensionRegistry) { extension = ((ExtensionRegistry) extensionRegistry) .findExtensionByNumber(type, typeId); - } else { - extension = null; - } - - if (extension != null) { - field = extension.descriptor; - subBuilder = extension.defaultInstance.newBuilderForType(); - final Message originalMessage = (Message)builder.getField(field); - if (originalMessage != null) { - subBuilder.mergeFrom(originalMessage); - } - if (rawBytes != null) { - // We already encountered the message. Parse it now. - subBuilder.mergeFrom( - CodedInputStream.newInstance(rawBytes.newInput())); - rawBytes = null; - } - } else { - // Unknown extension number. If we already saw data, put it - // in rawBytes. - if (rawBytes != null) { - unknownFields.mergeField(typeId, - UnknownFieldSet.Field.newBuilder() - .addLengthDelimited(rawBytes) - .build()); - rawBytes = null; - } } } + } else if (tag == WireFormat.MESSAGE_SET_MESSAGE_TAG) { - if (typeId == 0) { - // We haven't seen a type ID yet, so we have to store the raw bytes - // for now. - rawBytes = input.readBytes(); - } else if (subBuilder == null) { - // We don't know how to parse this. Ignore it. - unknownFields.mergeField(typeId, - UnknownFieldSet.Field.newBuilder() - .addLengthDelimited(input.readBytes()) - .build()); - } else { - // We already know the type, so we can parse directly from the input - // with no copying. Hooray! - input.readMessage(subBuilder, extensionRegistry); + if (typeId != 0) { + if (extension != null && ExtensionRegistryLite.isEagerlyParseMessageSets()) { + // We already know the type, so we can parse directly from the + // input with no copying. Hooray! + eagerlyMergeMessageSetExtension( + input, extension, extensionRegistry, builder, extensions); + rawBytes = null; + continue; + } } - } else { - // Unknown tag. Skip it. + // We haven't seen a type ID yet or we want parse message lazily. + rawBytes = input.readBytes(); + + } else { // Unknown tag. Skip it. if (!input.skipField(tag)) { - break; // end of group + break; // End of group } } } - input.checkLastTagWas(WireFormat.MESSAGE_SET_ITEM_END_TAG); - if (subBuilder != null) { - builder.setField(field, subBuilder.build()); + // Process the raw bytes. + if (rawBytes != null && typeId != 0) { // Zero is not a valid type ID. + if (extension != null) { // We known the type + mergeMessageSetExtensionFromBytes( + rawBytes, extension, extensionRegistry, builder, extensions); + } else { // We don't know how to parse this. Ignore it. + if (rawBytes != null) { + unknownFields.mergeField(typeId, UnknownFieldSet.Field.newBuilder() + .addLengthDelimited(rawBytes).build()); + } + } + } + } + + private static void eagerlyMergeMessageSetExtension( + CodedInputStream input, + ExtensionRegistry.ExtensionInfo extension, + ExtensionRegistryLite extensionRegistry, + Message.Builder builder, + FieldSet<FieldDescriptor> extensions) throws IOException { + + FieldDescriptor field = extension.descriptor; + Message value = null; + if (hasOriginalMessage(builder, extensions, field)) { + Message originalMessage = + getOriginalMessage(builder, extensions, field); + Message.Builder subBuilder = originalMessage.toBuilder(); + input.readMessage(subBuilder, extensionRegistry); + value = subBuilder.buildPartial(); + } else { + value = input.readMessage(extension.defaultInstance.getParserForType(), + extensionRegistry); + } + + if (builder != null) { + builder.setField(field, value); + } else { + extensions.setField(field, value); + } + } + + private static void mergeMessageSetExtensionFromBytes( + ByteString rawBytes, + ExtensionRegistry.ExtensionInfo extension, + ExtensionRegistryLite extensionRegistry, + Message.Builder builder, + FieldSet<FieldDescriptor> extensions) throws IOException { + + FieldDescriptor field = extension.descriptor; + boolean hasOriginalValue = hasOriginalMessage(builder, extensions, field); + + if (hasOriginalValue || ExtensionRegistryLite.isEagerlyParseMessageSets()) { + // If the field already exists, we just parse the field. + Message value = null; + if (hasOriginalValue) { + Message originalMessage = + getOriginalMessage(builder, extensions, field); + Message.Builder subBuilder= originalMessage.toBuilder(); + subBuilder.mergeFrom(rawBytes, extensionRegistry); + value = subBuilder.buildPartial(); + } else { + value = extension.defaultInstance.getParserForType() + .parsePartialFrom(rawBytes, extensionRegistry); + } + setField(builder, extensions, field, value); + } else { + // Use LazyField to load MessageSet lazily. + LazyField lazyField = new LazyField( + extension.defaultInstance, extensionRegistry, rawBytes); + if (builder != null) { + // TODO(xiangl): it looks like this method can only be invoked by + // ExtendableBuilder, but I'm not sure. So I double check the type of + // builder here. It may be useless and need more investigation. + if (builder instanceof ExtendableBuilder) { + builder.setField(field, lazyField); + } else { + builder.setField(field, lazyField.getValue()); + } + } else { + extensions.setField(field, lazyField); + } } } @@ -596,6 +756,11 @@ public abstract class AbstractMessage extends AbstractMessageLite return (BuilderType) this; } + public Message.Builder getFieldBuilder(final FieldDescriptor field) { + throw new UnsupportedOperationException( + "getFieldBuilder() called on an unsupported message type."); + } + /** * Construct an UninitializedMessageException reporting missing fields in * the given message. @@ -609,14 +774,15 @@ public abstract class AbstractMessage extends AbstractMessageLite * Populates {@code this.missingFields} with the full "path" of each * missing required field in the given message. */ - private static List<String> findMissingFields(final Message message) { + private static List<String> findMissingFields( + final MessageOrBuilder message) { final List<String> results = new ArrayList<String>(); findMissingFields(message, "", results); return results; } /** Recursive helper implementing {@link #findMissingFields(Message)}. */ - private static void findMissingFields(final Message message, + private static void findMissingFields(final MessageOrBuilder message, final String prefix, final List<String> results) { for (final FieldDescriptor field : @@ -635,13 +801,13 @@ public abstract class AbstractMessage extends AbstractMessageLite if (field.isRepeated()) { int i = 0; for (final Object element : (List) value) { - findMissingFields((Message) element, + findMissingFields((MessageOrBuilder) element, subMessagePrefix(prefix, field, i++), results); } } else { if (message.hasField(field)) { - findMissingFields((Message) value, + findMissingFields((MessageOrBuilder) value, subMessagePrefix(prefix, field, -1), results); } diff --git a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java index 77b27370..9926f3db 100644 --- a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java +++ b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java @@ -92,6 +92,14 @@ public abstract class AbstractMessageLite implements MessageLite { } /** + * Package private helper method for AbstractParser to create + * UninitializedMessageException. + */ + UninitializedMessageException newUninitializedMessageException() { + return new UninitializedMessageException(this); + } + + /** * A partial implementation of the {@link Message.Builder} interface which * implements as many methods of that interface as possible in terms of * other methods. @@ -307,10 +315,12 @@ public abstract class AbstractMessageLite implements MessageLite { */ protected static <T> void addAll(final Iterable<T> values, final Collection<? super T> list) { - for (final T value : values) { - if (value == null) { - throw new NullPointerException(); - } + if (values instanceof LazyStringList) { + // For StringOrByteStringLists, check the underlying elements to avoid + // forcing conversions of ByteStrings to Strings. + checkForNullValues(((LazyStringList) values).getUnderlyingElements()); + } else { + checkForNullValues(values); } if (values instanceof Collection) { final Collection<T> collection = (Collection<T>) values; @@ -321,5 +331,13 @@ public abstract class AbstractMessageLite implements MessageLite { } } } + + private static void checkForNullValues(final Iterable<?> values) { + for (final Object value : values) { + if (value == null) { + throw new NullPointerException(); + } + } + } } } diff --git a/java/src/main/java/com/google/protobuf/AbstractParser.java b/java/src/main/java/com/google/protobuf/AbstractParser.java new file mode 100644 index 00000000..9bd9d397 --- /dev/null +++ b/java/src/main/java/com/google/protobuf/AbstractParser.java @@ -0,0 +1,261 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A partial implementation of the {@link Parser} interface which implements + * as many methods of that interface as possible in terms of other methods. + * + * Note: This class implements all the convenience methods in the + * {@link Parser} interface. See {@link Parser} for related javadocs. + * Subclasses need to implement + * {@link Parser#parsePartialFrom(CodedInputStream, ExtensionRegistryLite)} + * + * @author liujisi@google.com (Pherl Liu) + */ +public abstract class AbstractParser<MessageType extends MessageLite> + implements Parser<MessageType> { + /** + * Creates an UninitializedMessageException for MessageType. + */ + private UninitializedMessageException + newUninitializedMessageException(MessageType message) { + if (message instanceof AbstractMessageLite) { + return ((AbstractMessageLite) message).newUninitializedMessageException(); + } + return new UninitializedMessageException(message); + } + + /** + * Helper method to check if message is initialized. + * + * @throws InvalidProtocolBufferException if it is not initialized. + * @return The message to check. + */ + private MessageType checkMessageInitialized(MessageType message) + throws InvalidProtocolBufferException { + if (message != null && !message.isInitialized()) { + throw newUninitializedMessageException(message) + .asInvalidProtocolBufferException() + .setUnfinishedMessage(message); + } + return message; + } + + private static final ExtensionRegistryLite EMPTY_REGISTRY + = ExtensionRegistryLite.getEmptyRegistry(); + + public MessageType parsePartialFrom(CodedInputStream input) + throws InvalidProtocolBufferException { + return parsePartialFrom(input, EMPTY_REGISTRY); + } + + public MessageType parseFrom(CodedInputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(input, extensionRegistry)); + } + + public MessageType parseFrom(CodedInputStream input) + throws InvalidProtocolBufferException { + return parseFrom(input, EMPTY_REGISTRY); + } + + public MessageType parsePartialFrom(ByteString data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + MessageType message; + try { + CodedInputStream input = data.newCodedInput(); + message = parsePartialFrom(input, extensionRegistry); + try { + input.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } catch (InvalidProtocolBufferException e) { + throw e; + } catch (IOException e) { + throw new RuntimeException( + "Reading from a ByteString threw an IOException (should " + + "never happen).", e); + } + } + + public MessageType parsePartialFrom(ByteString data) + throws InvalidProtocolBufferException { + return parsePartialFrom(data, EMPTY_REGISTRY); + } + + public MessageType parseFrom(ByteString data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized(parsePartialFrom(data, extensionRegistry)); + } + + public MessageType parseFrom(ByteString data) + throws InvalidProtocolBufferException { + return parseFrom(data, EMPTY_REGISTRY); + } + + public MessageType parsePartialFrom(byte[] data, int off, int len, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + try { + CodedInputStream input = CodedInputStream.newInstance(data, off, len); + MessageType message = parsePartialFrom(input, extensionRegistry); + try { + input.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } catch (InvalidProtocolBufferException e) { + throw e; + } catch (IOException e) { + throw new RuntimeException( + "Reading from a byte array threw an IOException (should " + + "never happen).", e); + } + } + + public MessageType parsePartialFrom(byte[] data, int off, int len) + throws InvalidProtocolBufferException { + return parsePartialFrom(data, off, len, EMPTY_REGISTRY); + } + + public MessageType parsePartialFrom(byte[] data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return parsePartialFrom(data, 0, data.length, extensionRegistry); + } + + public MessageType parsePartialFrom(byte[] data) + throws InvalidProtocolBufferException { + return parsePartialFrom(data, 0, data.length, EMPTY_REGISTRY); + } + + public MessageType parseFrom(byte[] data, int off, int len, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(data, off, len, extensionRegistry)); + } + + public MessageType parseFrom(byte[] data, int off, int len) + throws InvalidProtocolBufferException { + return parseFrom(data, off, len, EMPTY_REGISTRY); + } + + public MessageType parseFrom(byte[] data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return parseFrom(data, 0, data.length, extensionRegistry); + } + + public MessageType parseFrom(byte[] data) + throws InvalidProtocolBufferException { + return parseFrom(data, EMPTY_REGISTRY); + } + + public MessageType parsePartialFrom(InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + CodedInputStream codedInput = CodedInputStream.newInstance(input); + MessageType message = parsePartialFrom(codedInput, extensionRegistry); + try { + codedInput.checkLastTagWas(0); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(message); + } + return message; + } + + public MessageType parsePartialFrom(InputStream input) + throws InvalidProtocolBufferException { + return parsePartialFrom(input, EMPTY_REGISTRY); + } + + public MessageType parseFrom(InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialFrom(input, extensionRegistry)); + } + + public MessageType parseFrom(InputStream input) + throws InvalidProtocolBufferException { + return parseFrom(input, EMPTY_REGISTRY); + } + + public MessageType parsePartialDelimitedFrom( + InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + int size; + try { + int firstByte = input.read(); + if (firstByte == -1) { + return null; + } + size = CodedInputStream.readRawVarint32(firstByte, input); + } catch (IOException e) { + throw new InvalidProtocolBufferException(e.getMessage()); + } + InputStream limitedInput = new LimitedInputStream(input, size); + return parsePartialFrom(limitedInput, extensionRegistry); + } + + public MessageType parsePartialDelimitedFrom(InputStream input) + throws InvalidProtocolBufferException { + return parsePartialDelimitedFrom(input, EMPTY_REGISTRY); + } + + public MessageType parseDelimitedFrom( + InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + return checkMessageInitialized( + parsePartialDelimitedFrom(input, extensionRegistry)); + } + + public MessageType parseDelimitedFrom(InputStream input) + throws InvalidProtocolBufferException { + return parseDelimitedFrom(input, EMPTY_REGISTRY); + } +} diff --git a/java/src/main/java/com/google/protobuf/BoundedByteString.java b/java/src/main/java/com/google/protobuf/BoundedByteString.java new file mode 100644 index 00000000..cd4982c3 --- /dev/null +++ b/java/src/main/java/com/google/protobuf/BoundedByteString.java @@ -0,0 +1,163 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.util.NoSuchElementException; + +/** + * This class is used to represent the substring of a {@link ByteString} over a + * single byte array. In terms of the public API of {@link ByteString}, you end + * up here by calling {@link ByteString#copyFrom(byte[])} followed by {@link + * ByteString#substring(int, int)}. + * + * <p>This class contains most of the overhead involved in creating a substring + * from a {@link LiteralByteString}. The overhead involves some range-checking + * and two extra fields. + * + * @author carlanton@google.com (Carl Haverl) + */ +class BoundedByteString extends LiteralByteString { + + private final int bytesOffset; + private final int bytesLength; + + /** + * Creates a {@code BoundedByteString} backed by the sub-range of given array, + * without copying. + * + * @param bytes array to wrap + * @param offset index to first byte to use in bytes + * @param length number of bytes to use from bytes + * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0}, + * or if {@code offset + length > + * bytes.length}. + */ + BoundedByteString(byte[] bytes, int offset, int length) { + super(bytes); + if (offset < 0) { + throw new IllegalArgumentException("Offset too small: " + offset); + } + if (length < 0) { + throw new IllegalArgumentException("Length too small: " + offset); + } + if ((long) offset + length > bytes.length) { + throw new IllegalArgumentException( + "Offset+Length too large: " + offset + "+" + length); + } + + this.bytesOffset = offset; + this.bytesLength = length; + } + + /** + * Gets the byte at the given index. + * Throws {@link ArrayIndexOutOfBoundsException} + * for backwards-compatibility reasons although it would more properly be + * {@link IndexOutOfBoundsException}. + * + * @param index index of byte + * @return the value + * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size + */ + @Override + public byte byteAt(int index) { + // We must check the index ourselves as we cannot rely on Java array index + // checking for substrings. + if (index < 0) { + throw new ArrayIndexOutOfBoundsException("Index too small: " + index); + } + if (index >= size()) { + throw new ArrayIndexOutOfBoundsException( + "Index too large: " + index + ", " + size()); + } + + return bytes[bytesOffset + index]; + } + + @Override + public int size() { + return bytesLength; + } + + @Override + protected int getOffsetIntoBytes() { + return bytesOffset; + } + + // ================================================================= + // ByteString -> byte[] + + @Override + protected void copyToInternal(byte[] target, int sourceOffset, + int targetOffset, int numberToCopy) { + System.arraycopy(bytes, getOffsetIntoBytes() + sourceOffset, target, + targetOffset, numberToCopy); + } + + // ================================================================= + // ByteIterator + + @Override + public ByteIterator iterator() { + return new BoundedByteIterator(); + } + + private class BoundedByteIterator implements ByteIterator { + + private int position; + private final int limit; + + private BoundedByteIterator() { + position = getOffsetIntoBytes(); + limit = position + size(); + } + + public boolean hasNext() { + return (position < limit); + } + + public Byte next() { + // Boxing calls Byte.valueOf(byte), which does not instantiate. + return nextByte(); + } + + public byte nextByte() { + if (position >= limit) { + throw new NoSuchElementException(); + } + return bytes[position++]; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } +} diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java index 91356357..1b18169e 100644 --- a/java/src/main/java/com/google/protobuf/ByteString.java +++ b/java/src/main/java/com/google/protobuf/ByteString.java @@ -30,140 +30,413 @@ package com.google.protobuf; -import java.io.InputStream; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; import java.util.List; +import java.util.NoSuchElementException; /** - * Immutable array of bytes. + * Immutable sequence of bytes. Substring is supported by sharing the reference + * to the immutable underlying bytes, as with {@link String}. Concatenation is + * likewise supported without copying (long strings) by building a tree of + * pieces in {@link RopeByteString}. + * <p> + * Like {@link String}, the contents of a {@link ByteString} can never be + * observed to change, not even in the presence of a data race or incorrect + * API usage in the client code. * * @author crazybob@google.com Bob Lee * @author kenton@google.com Kenton Varda + * @author carlanton@google.com Carl Haverl + * @author martinrb@google.com Martin Buchholz */ -public final class ByteString { - private final byte[] bytes; +public abstract class ByteString implements Iterable<Byte> { - private ByteString(final byte[] bytes) { - this.bytes = bytes; - } + /** + * When two strings to be concatenated have a combined length shorter than + * this, we just copy their bytes on {@link #concat(ByteString)}. + * The trade-off is copy size versus the overhead of creating tree nodes + * in {@link RopeByteString}. + */ + static final int CONCATENATE_BY_COPY_SIZE = 128; + + /** + * When copying an InputStream into a ByteString with .readFrom(), + * the chunks in the underlying rope start at 256 bytes, but double + * each iteration up to 8192 bytes. + */ + static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b + static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k /** - * Gets the byte at the given index. + * Empty {@code ByteString}. + */ + public static final ByteString EMPTY = new LiteralByteString(new byte[0]); + + // This constructor is here to prevent subclassing outside of this package, + ByteString() {} + + /** + * Gets the byte at the given index. This method should be used only for + * random access to individual bytes. To access bytes sequentially, use the + * {@link ByteIterator} returned by {@link #iterator()}, and call {@link + * #substring(int, int)} first if necessary. * + * @param index index of byte + * @return the value * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size */ - public byte byteAt(final int index) { - return bytes[index]; + public abstract byte byteAt(int index); + + /** + * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. + * To avoid auto-boxing, you may get the iterator manually and call + * {@link ByteIterator#nextByte()}. + * + * @return the iterator + */ + public abstract ByteIterator iterator(); + + /** + * This interface extends {@code Iterator<Byte>}, so that we can return an + * unboxed {@code byte}. + */ + public interface ByteIterator extends Iterator<Byte> { + /** + * An alternative to {@link Iterator#next()} that returns an + * unboxed primitive {@code byte}. + * + * @return the next {@code byte} in the iteration + * @throws NoSuchElementException if the iteration has no more elements + */ + byte nextByte(); } /** * Gets the number of bytes. + * + * @return size in bytes */ - public int size() { - return bytes.length; - } + public abstract int size(); /** * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. + * + * @return true if this is zero bytes long */ public boolean isEmpty() { - return bytes.length == 0; + return size() == 0; } // ================================================================= - // byte[] -> ByteString + // ByteString -> substring /** - * Empty ByteString. + * Return the substring from {@code beginIndex}, inclusive, to the end of the + * string. + * + * @param beginIndex start at this index + * @return substring sharing underlying data + * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or + * {@code beginIndex > size()}. */ - public static final ByteString EMPTY = new ByteString(new byte[0]); + public ByteString substring(int beginIndex) { + return substring(beginIndex, size()); + } + + /** + * Return the substring from {@code beginIndex}, inclusive, to {@code + * endIndex}, exclusive. + * + * @param beginIndex start at this index + * @param endIndex the last character is the one before this index + * @return substring sharing underlying data + * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, + * {@code endIndex > size()}, or {@code beginIndex > endIndex}. + */ + public abstract ByteString substring(int beginIndex, int endIndex); + + /** + * Tests if this bytestring starts with the specified prefix. + * Similar to {@link String#startsWith(String)} + * + * @param prefix the prefix. + * @return <code>true</code> if the byte sequence represented by the + * argument is a prefix of the byte sequence represented by + * this string; <code>false</code> otherwise. + */ + public boolean startsWith(ByteString prefix) { + return size() >= prefix.size() && + substring(0, prefix.size()).equals(prefix); + } + + // ================================================================= + // byte[] -> ByteString /** * Copies the given bytes into a {@code ByteString}. + * + * @param bytes source array + * @param offset offset in source array + * @param size number of bytes to copy + * @return new {@code ByteString} */ - public static ByteString copyFrom(final byte[] bytes, final int offset, - final int size) { - final byte[] copy = new byte[size]; + public static ByteString copyFrom(byte[] bytes, int offset, int size) { + byte[] copy = new byte[size]; System.arraycopy(bytes, offset, copy, 0, size); - return new ByteString(copy); + return new LiteralByteString(copy); } /** * Copies the given bytes into a {@code ByteString}. + * + * @param bytes to copy + * @return new {@code ByteString} */ - public static ByteString copyFrom(final byte[] bytes) { + public static ByteString copyFrom(byte[] bytes) { return copyFrom(bytes, 0, bytes.length); } /** - * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into + * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into * a {@code ByteString}. + * + * @param bytes source buffer + * @param size number of bytes to copy + * @return new {@code ByteString} */ - public static ByteString copyFrom(final ByteBuffer bytes, final int size) { - final byte[] copy = new byte[size]; + public static ByteString copyFrom(ByteBuffer bytes, int size) { + byte[] copy = new byte[size]; bytes.get(copy); - return new ByteString(copy); + return new LiteralByteString(copy); } /** * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into * a {@code ByteString}. + * + * @param bytes sourceBuffer + * @return new {@code ByteString} */ - public static ByteString copyFrom(final ByteBuffer bytes) { + public static ByteString copyFrom(ByteBuffer bytes) { return copyFrom(bytes, bytes.remaining()); } /** * Encodes {@code text} into a sequence of bytes using the named charset * and returns the result as a {@code ByteString}. + * + * @param text source string + * @param charsetName encoding to use + * @return new {@code ByteString} + * @throws UnsupportedEncodingException if the encoding isn't found */ - public static ByteString copyFrom(final String text, final String charsetName) + public static ByteString copyFrom(String text, String charsetName) throws UnsupportedEncodingException { - return new ByteString(text.getBytes(charsetName)); + return new LiteralByteString(text.getBytes(charsetName)); } /** * Encodes {@code text} into a sequence of UTF-8 bytes and returns the * result as a {@code ByteString}. + * + * @param text source string + * @return new {@code ByteString} */ - public static ByteString copyFromUtf8(final String text) { + public static ByteString copyFromUtf8(String text) { try { - return new ByteString(text.getBytes("UTF-8")); + return new LiteralByteString(text.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException("UTF-8 not supported?", e); } } + // ================================================================= + // InputStream -> ByteString + /** - * Concatenates all byte strings in the list and returns the result. + * Completely reads the given stream's bytes into a + * {@code ByteString}, blocking if necessary until all bytes are + * read through to the end of the stream. + * + * <b>Performance notes:</b> The returned {@code ByteString} is an + * immutable tree of byte arrays ("chunks") of the stream data. The + * first chunk is small, with subsequent chunks each being double + * the size, up to 8K. If the caller knows the precise length of + * the stream and wishes to avoid all unnecessary copies and + * allocations, consider using the two-argument version of this + * method, below. + * + * @param streamToDrain The source stream, which is read completely + * but not closed. + * @return A new {@code ByteString} which is made up of chunks of + * various sizes, depending on the behavior of the underlying + * stream. + * @throws IOException IOException is thrown if there is a problem + * reading the underlying stream. + */ + public static ByteString readFrom(InputStream streamToDrain) + throws IOException { + return readFrom( + streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); + } + + /** + * Completely reads the given stream's bytes into a + * {@code ByteString}, blocking if necessary until all bytes are + * read through to the end of the stream. + * + * <b>Performance notes:</b> The returned {@code ByteString} is an + * immutable tree of byte arrays ("chunks") of the stream data. The + * chunkSize parameter sets the size of these byte arrays. In + * particular, if the chunkSize is precisely the same as the length + * of the stream, unnecessary allocations and copies will be + * avoided. Otherwise, the chunks will be of the given size, except + * for the last chunk, which will be resized (via a reallocation and + * copy) to contain the remainder of the stream. + * + * @param streamToDrain The source stream, which is read completely + * but not closed. + * @param chunkSize The size of the chunks in which to read the + * stream. + * @return A new {@code ByteString} which is made up of chunks of + * the given size. + * @throws IOException IOException is thrown if there is a problem + * reading the underlying stream. + */ + public static ByteString readFrom(InputStream streamToDrain, int chunkSize) + throws IOException { + return readFrom(streamToDrain, chunkSize, chunkSize); + } + + // Helper method that takes the chunk size range as a parameter. + public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, + int maxChunkSize) throws IOException { + Collection<ByteString> results = new ArrayList<ByteString>(); + + // copy the inbound bytes into a list of chunks; the chunk size + // grows exponentially to support both short and long streams. + int chunkSize = minChunkSize; + while (true) { + ByteString chunk = readChunk(streamToDrain, chunkSize); + if (chunk == null) { + break; + } + results.add(chunk); + chunkSize = Math.min(chunkSize * 2, maxChunkSize); + } + + return ByteString.copyFrom(results); + } + + /** + * Blocks until a chunk of the given size can be made from the + * stream, or EOF is reached. Calls read() repeatedly in case the + * given stream implementation doesn't completely fill the given + * buffer in one read() call. + * + * @return A chunk of the desired size, or else a chunk as large as + * was available when end of stream was reached. Returns null if the + * given stream had no more data in it. + */ + private static ByteString readChunk(InputStream in, final int chunkSize) + throws IOException { + final byte[] buf = new byte[chunkSize]; + int bytesRead = 0; + while (bytesRead < chunkSize) { + final int count = in.read(buf, bytesRead, chunkSize - bytesRead); + if (count == -1) { + break; + } + bytesRead += count; + } + + if (bytesRead == 0) { + return null; + } else { + return ByteString.copyFrom(buf, 0, bytesRead); + } + } + + // ================================================================= + // Multiple ByteStrings -> One ByteString + + /** + * Concatenate the given {@code ByteString} to this one. Short concatenations, + * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are + * produced by copying the underlying bytes (as per Rope.java, <a + * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> + * BAP95 </a>. In general, the concatenate involves no copying. + * + * @param other string to concatenate + * @return a new {@code ByteString} instance + */ + public ByteString concat(ByteString other) { + int thisSize = size(); + int otherSize = other.size(); + if ((long) thisSize + otherSize >= Integer.MAX_VALUE) { + throw new IllegalArgumentException("ByteString would be too long: " + + thisSize + "+" + otherSize); + } + + return RopeByteString.concatenate(this, other); + } + + /** + * Concatenates all byte strings in the iterable and returns the result. + * This is designed to run in O(list size), not O(total bytes). * * <p>The returned {@code ByteString} is not necessarily a unique object. * If the list is empty, the returned object is the singleton empty * {@code ByteString}. If the list has only one element, that * {@code ByteString} will be returned without copying. + * + * @param byteStrings strings to be concatenated + * @return new {@code ByteString} */ - public static ByteString copyFrom(List<ByteString> list) { - if (list.size() == 0) { - return EMPTY; - } else if (list.size() == 1) { - return list.get(0); + public static ByteString copyFrom(Iterable<ByteString> byteStrings) { + Collection<ByteString> collection; + if (!(byteStrings instanceof Collection)) { + collection = new ArrayList<ByteString>(); + for (ByteString byteString : byteStrings) { + collection.add(byteString); + } + } else { + collection = (Collection<ByteString>) byteStrings; } - - int size = 0; - for (ByteString str : list) { - size += str.size(); + ByteString result; + if (collection.isEmpty()) { + result = EMPTY; + } else { + result = balancedConcat(collection.iterator(), collection.size()); } - byte[] bytes = new byte[size]; - int pos = 0; - for (ByteString str : list) { - System.arraycopy(str.bytes, 0, bytes, pos, str.size()); - pos += str.size(); + return result; + } + + // Internal function used by copyFrom(Iterable<ByteString>). + // Create a balanced concatenation of the next "length" elements from the + // iterable. + private static ByteString balancedConcat(Iterator<ByteString> iterator, + int length) { + assert length >= 1; + ByteString result; + if (length == 1) { + result = iterator.next(); + } else { + int halfLength = length >>> 1; + ByteString left = balancedConcat(iterator, halfLength); + ByteString right = balancedConcat(iterator, length - halfLength); + result = left.concat(right); } - return new ByteString(bytes); + return result; } // ================================================================= @@ -174,206 +447,446 @@ public final class ByteString { * * @param target buffer to copy into * @param offset in the target buffer + * @throws IndexOutOfBoundsException if the offset is negative or too large */ - public void copyTo(final byte[] target, final int offset) { - System.arraycopy(bytes, 0, target, offset, bytes.length); + public void copyTo(byte[] target, int offset) { + copyTo(target, 0, offset, size()); } /** * Copies bytes into a buffer. * - * @param target buffer to copy into + * @param target buffer to copy into * @param sourceOffset offset within these bytes * @param targetOffset offset within the target buffer - * @param size number of bytes to copy + * @param numberToCopy number of bytes to copy + * @throws IndexOutOfBoundsException if an offset or size is negative or too + * large */ - public void copyTo(final byte[] target, final int sourceOffset, - final int targetOffset, - final int size) { - System.arraycopy(bytes, sourceOffset, target, targetOffset, size); + public void copyTo(byte[] target, int sourceOffset, int targetOffset, + int numberToCopy) { + if (sourceOffset < 0) { + throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); + } + if (targetOffset < 0) { + throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset); + } + if (numberToCopy < 0) { + throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy); + } + if (sourceOffset + numberToCopy > size()) { + throw new IndexOutOfBoundsException( + "Source end offset < 0: " + (sourceOffset + numberToCopy)); + } + if (targetOffset + numberToCopy > target.length) { + throw new IndexOutOfBoundsException( + "Target end offset < 0: " + (targetOffset + numberToCopy)); + } + if (numberToCopy > 0) { + copyToInternal(target, sourceOffset, targetOffset, numberToCopy); + } } /** + * Internal (package private) implementation of + * @link{#copyTo(byte[],int,int,int}. + * It assumes that all error checking has already been performed and that + * @code{numberToCopy > 0}. + */ + protected abstract void copyToInternal(byte[] target, int sourceOffset, + int targetOffset, int numberToCopy); + + /** * Copies bytes into a ByteBuffer. * * @param target ByteBuffer to copy into. - * @throws ReadOnlyBufferException if the {@code target} is read-only - * @throws BufferOverflowException if the {@code target}'s remaining() - * space is not large enough to hold the data. + * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only + * @throws java.nio.BufferOverflowException if the {@code target}'s + * remaining() space is not large enough to hold the data. */ - public void copyTo(ByteBuffer target) { - target.put(bytes, 0, bytes.length); - } + public abstract void copyTo(ByteBuffer target); /** * Copies bytes to a {@code byte[]}. + * + * @return copied bytes */ public byte[] toByteArray() { - final int size = bytes.length; - final byte[] copy = new byte[size]; - System.arraycopy(bytes, 0, copy, 0, size); - return copy; + int size = size(); + byte[] result = new byte[size]; + copyToInternal(result, 0, 0, size); + return result; } /** - * Constructs a new read-only {@code java.nio.ByteBuffer} with the - * same backing byte array. + * Writes the complete contents of this byte string to + * the specified output stream argument. + * + * @param out the output stream to which to write the data. + * @throws IOException if an I/O error occurs. */ - public ByteBuffer asReadOnlyByteBuffer() { - final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); - return byteBuffer.asReadOnlyBuffer(); - } + public abstract void writeTo(OutputStream out) throws IOException; + + /** + * Constructs a read-only {@code java.nio.ByteBuffer} whose content + * is equal to the contents of this byte string. + * The result uses the same backing array as the byte string, if possible. + * + * @return wrapped bytes + */ + public abstract ByteBuffer asReadOnlyByteBuffer(); + + /** + * Constructs a list of read-only {@code java.nio.ByteBuffer} objects + * such that the concatenation of their contents is equal to the contents + * of this byte string. The result uses the same backing arrays as the + * byte string. + * <p> + * By returning a list, implementations of this method may be able to avoid + * copying even when there are multiple backing arrays. + * + * @return a list of wrapped bytes + */ + public abstract List<ByteBuffer> asReadOnlyByteBufferList(); /** * Constructs a new {@code String} by decoding the bytes using the * specified charset. + * + * @param charsetName encode using this charset + * @return new string + * @throws UnsupportedEncodingException if charset isn't recognized */ - public String toString(final String charsetName) - throws UnsupportedEncodingException { - return new String(bytes, charsetName); - } + public abstract String toString(String charsetName) + throws UnsupportedEncodingException; + + // ================================================================= + // UTF-8 decoding /** * Constructs a new {@code String} by decoding the bytes as UTF-8. + * + * @return new string using UTF-8 encoding */ public String toStringUtf8() { try { - return new String(bytes, "UTF-8"); + return toString("UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException("UTF-8 not supported?", e); } } + /** + * Tells whether this {@code ByteString} represents a well-formed UTF-8 + * byte sequence, such that the original bytes can be converted to a + * String object and then round tripped back to bytes without loss. + * + * <p>More precisely, returns {@code true} whenever: <pre> {@code + * Arrays.equals(byteString.toByteArray(), + * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) + * }</pre> + * + * <p>This method returns {@code false} for "overlong" byte sequences, + * as well as for 3-byte sequences that would map to a surrogate + * character, in accordance with the restricted definition of UTF-8 + * introduced in Unicode 3.1. Note that the UTF-8 decoder included in + * Oracle's JDK has been modified to also reject "overlong" byte + * sequences, but (as of 2011) still accepts 3-byte surrogate + * character byte sequences. + * + * <p>See the Unicode Standard,</br> + * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> + * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. + * + * @return whether the bytes in this {@code ByteString} are a + * well-formed UTF-8 byte sequence + */ + public abstract boolean isValidUtf8(); + + /** + * Tells whether the given byte sequence is a well-formed, malformed, or + * incomplete UTF-8 byte sequence. This method accepts and returns a partial + * state result, allowing the bytes for a complete UTF-8 byte sequence to be + * composed from multiple {@code ByteString} segments. + * + * @param state either {@code 0} (if this is the initial decoding operation) + * or the value returned from a call to a partial decoding method for the + * previous bytes + * @param offset offset of the first byte to check + * @param length number of bytes to check + * + * @return {@code -1} if the partial byte sequence is definitely malformed, + * {@code 0} if it is well-formed (no additional input needed), or, if the + * byte sequence is "incomplete", i.e. apparently terminated in the middle of + * a character, an opaque integer "state" value containing enough information + * to decode the character when passed to a subsequent invocation of a + * partial decoding method. + */ + protected abstract int partialIsValidUtf8(int state, int offset, int length); + // ================================================================= // equals() and hashCode() @Override - public boolean equals(final Object o) { - if (o == this) { - return true; - } - - if (!(o instanceof ByteString)) { - return false; - } - - final ByteString other = (ByteString) o; - final int size = bytes.length; - if (size != other.bytes.length) { - return false; - } - - final byte[] thisBytes = bytes; - final byte[] otherBytes = other.bytes; - for (int i = 0; i < size; i++) { - if (thisBytes[i] != otherBytes[i]) { - return false; - } - } - - return true; - } - - private volatile int hash = 0; + public abstract boolean equals(Object o); + /** + * Return a non-zero hashCode depending only on the sequence of bytes + * in this ByteString. + * + * @return hashCode value for this object + */ @Override - public int hashCode() { - int h = hash; - - if (h == 0) { - final byte[] thisBytes = bytes; - final int size = bytes.length; - - h = size; - for (int i = 0; i < size; i++) { - h = h * 31 + thisBytes[i]; - } - if (h == 0) { - h = 1; - } - - hash = h; - } - - return h; - } + public abstract int hashCode(); // ================================================================= // Input stream /** * Creates an {@code InputStream} which can be used to read the bytes. + * <p> + * The {@link InputStream} returned by this method is guaranteed to be + * completely non-blocking. The method {@link InputStream#available()} + * returns the number of bytes remaining in the stream. The methods + * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)} + * and {@link InputStream#skip(long)} will read/skip as many bytes as are + * available. + * <p> + * The methods in the returned {@link InputStream} might <b>not</b> be + * thread safe. + * + * @return an input stream that returns the bytes of this byte string. */ - public InputStream newInput() { - return new ByteArrayInputStream(bytes); - } + public abstract InputStream newInput(); /** * Creates a {@link CodedInputStream} which can be used to read the bytes. - * Using this is more efficient than creating a {@link CodedInputStream} - * wrapping the result of {@link #newInput()}. + * Using this is often more efficient than creating a {@link CodedInputStream} + * that wraps the result of {@link #newInput()}. + * + * @return stream based on wrapped data */ - public CodedInputStream newCodedInput() { - // We trust CodedInputStream not to modify the bytes, or to give anyone - // else access to them. - return CodedInputStream.newInstance(bytes); - } + public abstract CodedInputStream newCodedInput(); // ================================================================= // Output stream /** - * Creates a new {@link Output} with the given initial capacity. + * Creates a new {@link Output} with the given initial capacity. Call {@link + * Output#toByteString()} to create the {@code ByteString} instance. + * <p> + * A {@link ByteString.Output} offers the same functionality as a + * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} + * rather than a {@code byte} array. + * + * @param initialCapacity estimate of number of bytes to be written + * @return {@code OutputStream} for building a {@code ByteString} */ - public static Output newOutput(final int initialCapacity) { - return new Output(new ByteArrayOutputStream(initialCapacity)); + public static Output newOutput(int initialCapacity) { + return new Output(initialCapacity); } /** - * Creates a new {@link Output}. + * Creates a new {@link Output}. Call {@link Output#toByteString()} to create + * the {@code ByteString} instance. + * <p> + * A {@link ByteString.Output} offers the same functionality as a + * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} + * rather than a {@code byte array}. + * + * @return {@code OutputStream} for building a {@code ByteString} */ public static Output newOutput() { - return newOutput(32); + return new Output(CONCATENATE_BY_COPY_SIZE); } /** * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to * create the {@code ByteString} instance. */ - public static final class Output extends FilterOutputStream { - private final ByteArrayOutputStream bout; + public static final class Output extends OutputStream { + // Implementation note. + // The public methods of this class must be synchronized. ByteStrings + // are guaranteed to be immutable. Without some sort of locking, it could + // be possible for one thread to call toByteSring(), while another thread + // is still modifying the underlying byte array. + + private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + // argument passed by user, indicating initial capacity. + private final int initialCapacity; + // ByteStrings to be concatenated to create the result + private final ArrayList<ByteString> flushedBuffers; + // Total number of bytes in the ByteStrings of flushedBuffers + private int flushedBuffersTotalBytes; + // Current buffer to which we are writing + private byte[] buffer; + // Location in buffer[] to which we write the next byte. + private int bufferPos; /** - * Constructs a new output with the given initial capacity. + * Creates a new ByteString output stream with the specified + * initial capacity. + * + * @param initialCapacity the initial capacity of the output stream. */ - private Output(final ByteArrayOutputStream bout) { - super(bout); - this.bout = bout; + Output(int initialCapacity) { + if (initialCapacity < 0) { + throw new IllegalArgumentException("Buffer size < 0"); + } + this.initialCapacity = initialCapacity; + this.flushedBuffers = new ArrayList<ByteString>(); + this.buffer = new byte[initialCapacity]; + } + + @Override + public synchronized void write(int b) { + if (bufferPos == buffer.length) { + flushFullBuffer(1); + } + buffer[bufferPos++] = (byte)b; + } + + @Override + public synchronized void write(byte[] b, int offset, int length) { + if (length <= buffer.length - bufferPos) { + // The bytes can fit into the current buffer. + System.arraycopy(b, offset, buffer, bufferPos, length); + bufferPos += length; + } else { + // Use up the current buffer + int copySize = buffer.length - bufferPos; + System.arraycopy(b, offset, buffer, bufferPos, copySize); + offset += copySize; + length -= copySize; + // Flush the buffer, and get a new buffer at least big enough to cover + // what we still need to output + flushFullBuffer(length); + System.arraycopy(b, offset, buffer, 0 /* count */, length); + bufferPos = length; + } + } + + /** + * Creates a byte string. Its size is the current size of this output + * stream and its output has been copied to it. + * + * @return the current contents of this output stream, as a byte string. + */ + public synchronized ByteString toByteString() { + flushLastBuffer(); + return ByteString.copyFrom(flushedBuffers); + } + + /** + * Writes the complete contents of this byte array output stream to + * the specified output stream argument. + * + * @param out the output stream to which to write the data. + * @throws IOException if an I/O error occurs. + */ + public void writeTo(OutputStream out) throws IOException { + ByteString[] cachedFlushBuffers; + byte[] cachedBuffer; + int cachedBufferPos; + synchronized (this) { + // Copy the information we need into local variables so as to hold + // the lock for as short a time as possible. + cachedFlushBuffers = + flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); + cachedBuffer = buffer; + cachedBufferPos = bufferPos; + } + for (ByteString byteString : cachedFlushBuffers) { + byteString.writeTo(out); + } + + out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos)); + } + + /** + * Returns the current size of the output stream. + * + * @return the current size of the output stream + */ + public synchronized int size() { + return flushedBuffersTotalBytes + bufferPos; + } + + /** + * Resets this stream, so that all currently accumulated output in the + * output stream is discarded. The output stream can be used again, + * reusing the already allocated buffer space. + */ + public synchronized void reset() { + flushedBuffers.clear(); + flushedBuffersTotalBytes = 0; + bufferPos = 0; + } + + @Override + public String toString() { + return String.format("<ByteString.Output@%s size=%d>", + Integer.toHexString(System.identityHashCode(this)), size()); } /** - * Creates a {@code ByteString} instance from this {@code Output}. + * Internal function used by writers. The current buffer is full, and the + * writer needs a new buffer whose size is at least the specified minimum + * size. */ - public ByteString toByteString() { - final byte[] byteArray = bout.toByteArray(); - return new ByteString(byteArray); + private void flushFullBuffer(int minSize) { + flushedBuffers.add(new LiteralByteString(buffer)); + flushedBuffersTotalBytes += buffer.length; + // We want to increase our total capacity by 50%, but as a minimum, + // the new buffer should also at least be >= minSize and + // >= initial Capacity. + int newSize = Math.max(initialCapacity, + Math.max(minSize, flushedBuffersTotalBytes >>> 1)); + buffer = new byte[newSize]; + bufferPos = 0; + } + + /** + * Internal function used by {@link #toByteString()}. The current buffer may + * or may not be full, but it needs to be flushed. + */ + private void flushLastBuffer() { + if (bufferPos < buffer.length) { + if (bufferPos > 0) { + byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos); + flushedBuffers.add(new LiteralByteString(bufferCopy)); + } + // We reuse this buffer for further writes. + } else { + // Buffer is completely full. Huzzah. + flushedBuffers.add(new LiteralByteString(buffer)); + // 99% of the time, we're not going to use this OutputStream again. + // We set buffer to an empty byte stream so that we're handling this + // case without wasting space. In the rare case that more writes + // *do* occur, this empty buffer will be flushed and an appropriately + // sized new buffer will be created. + buffer = EMPTY_BYTE_ARRAY; + } + flushedBuffersTotalBytes += bufferPos; + bufferPos = 0; } } /** - * Constructs a new ByteString builder, which allows you to efficiently - * construct a {@code ByteString} by writing to a {@link CodedOutputStream}. - * Using this is much more efficient than calling {@code newOutput()} and - * wrapping that in a {@code CodedOutputStream}. + * Constructs a new {@code ByteString} builder, which allows you to + * efficiently construct a {@code ByteString} by writing to a {@link + * CodedOutputStream}. Using this is much more efficient than calling {@code + * newOutput()} and wrapping that in a {@code CodedOutputStream}. * * <p>This is package-private because it's a somewhat confusing interface. * Users can call {@link Message#toByteString()} instead of calling this * directly. * - * @param size The target byte size of the {@code ByteString}. You must - * write exactly this many bytes before building the result. + * @param size The target byte size of the {@code ByteString}. You must write + * exactly this many bytes before building the result. + * @return the builder */ - static CodedBuilder newCodedBuilder(final int size) { + static CodedBuilder newCodedBuilder(int size) { return new CodedBuilder(size); } @@ -382,7 +895,7 @@ public final class ByteString { private final CodedOutputStream output; private final byte[] buffer; - private CodedBuilder(final int size) { + private CodedBuilder(int size) { buffer = new byte[size]; output = CodedOutputStream.newInstance(buffer); } @@ -393,11 +906,57 @@ public final class ByteString { // We can be confident that the CodedOutputStream will not modify the // underlying bytes anymore because it already wrote all of them. So, // no need to make a copy. - return new ByteString(buffer); + return new LiteralByteString(buffer); } public CodedOutputStream getCodedOutput() { return output; } } + + // ================================================================= + // Methods {@link RopeByteString} needs on instances, which aren't part of the + // public API. + + /** + * Return the depth of the tree representing this {@code ByteString}, if any, + * whose root is this node. If this is a leaf node, return 0. + * + * @return tree depth or zero + */ + protected abstract int getTreeDepth(); + + /** + * Return {@code true} if this ByteString is literal (a leaf node) or a + * flat-enough tree in the sense of {@link RopeByteString}. + * + * @return true if the tree is flat enough + */ + protected abstract boolean isBalanced(); + + /** + * Return the cached hash code if available. + * + * @return value of cached hash code or 0 if not computed yet + */ + protected abstract int peekCachedHashCode(); + + /** + * Compute the hash across the value bytes starting with the given hash, and + * return the result. This is used to compute the hash across strings + * represented as a set of pieces by allowing the hash computation to be + * continued from piece to piece. + * + * @param h starting hash value + * @param offset offset into this value to start looking at data values + * @param length number of data values to include in the hash computation + * @return ending hash value + */ + protected abstract int partialHash(int h, int offset, int length); + + @Override + public String toString() { + return String.format("<ByteString@%s size=%d>", + Integer.toHexString(System.identityHashCode(this)), size()); + } } diff --git a/java/src/main/java/com/google/protobuf/CodedInputStream.java b/java/src/main/java/com/google/protobuf/CodedInputStream.java index b3e08555..33417a7f 100644 --- a/java/src/main/java/com/google/protobuf/CodedInputStream.java +++ b/java/src/main/java/com/google/protobuf/CodedInputStream.java @@ -243,6 +243,23 @@ public final class CodedInputStream { --recursionDepth; } + /** Read a {@code group} field value from the stream. */ + public <T extends MessageLite> T readGroup( + final int fieldNumber, + final Parser<T> parser, + final ExtensionRegistryLite extensionRegistry) + throws IOException { + if (recursionDepth >= recursionLimit) { + throw InvalidProtocolBufferException.recursionLimitExceeded(); + } + ++recursionDepth; + T result = parser.parsePartialFrom(this, extensionRegistry); + checkLastTagWas( + WireFormat.makeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP)); + --recursionDepth; + return result; + } + /** * Reads a {@code group} field value from the stream and merges it into the * given {@link UnknownFieldSet}. @@ -278,6 +295,24 @@ public final class CodedInputStream { popLimit(oldLimit); } + /** Read an embedded message field value from the stream. */ + public <T extends MessageLite> T readMessage( + final Parser<T> parser, + final ExtensionRegistryLite extensionRegistry) + throws IOException { + int length = readRawVarint32(); + if (recursionDepth >= recursionLimit) { + throw InvalidProtocolBufferException.recursionLimitExceeded(); + } + final int oldLimit = pushLimit(length); + ++recursionDepth; + T result = parser.parsePartialFrom(this, extensionRegistry); + checkLastTagWas(0); + --recursionDepth; + popLimit(oldLimit); + return result; + } + /** Read a {@code bytes} field value from the stream. */ public ByteString readBytes() throws IOException { final int size = readRawVarint32(); @@ -601,7 +636,7 @@ public final class CodedInputStream { * refreshing its buffer. If you need to prevent reading past a certain * point in the underlying {@code InputStream} (e.g. because you expect it to * contain more data after the end of the message which you need to handle - * differently) then you must place a wrapper around you {@code InputStream} + * differently) then you must place a wrapper around your {@code InputStream} * which limits the amount of data that can be read from it. * * @return the old limit. @@ -676,7 +711,7 @@ public final class CodedInputStream { /** * Called with {@code this.buffer} is empty to read more bytes from the - * input. If {@code mustSucceed} is true, refillBuffer() gurantees that + * input. If {@code mustSucceed} is true, refillBuffer() guarantees that * either there will be at least one byte in the buffer when it returns * or it will throw an exception. If {@code mustSucceed} is false, * refillBuffer() returns false if no more bytes were available. @@ -879,7 +914,7 @@ public final class CodedInputStream { refillBuffer(true); } - bufferPos = size - pos; + bufferPos = size - pos; } } } diff --git a/java/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/src/main/java/com/google/protobuf/CodedOutputStream.java index ac5f2d30..ca24638d 100644 --- a/java/src/main/java/com/google/protobuf/CodedOutputStream.java +++ b/java/src/main/java/com/google/protobuf/CodedOutputStream.java @@ -30,10 +30,10 @@ package com.google.protobuf; -import java.io.OutputStream; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.io.InputStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; /** * Encodes and writes protocol message fields. @@ -540,6 +540,15 @@ public final class CodedOutputStream { } /** + * Compute the number of bytes that would be needed to encode an + * embedded message in lazy field, including tag. + */ + public static int computeLazyFieldSize(final int fieldNumber, + final LazyField value) { + return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value); + } + + /** * Compute the number of bytes that would be needed to encode a * {@code uint32} field, including tag. */ @@ -614,6 +623,18 @@ public final class CodedOutputStream { computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value); } + /** + * Compute the number of bytes that would be needed to encode an + * lazily parsed MessageSet extension field to the stream. For + * historical reasons, the wire format differs from normal fields. + */ + public static int computeLazyFieldMessageSetExtensionSize( + final int fieldNumber, final LazyField value) { + return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 + + computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) + + computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value); + } + // ----------------------------------------------------------------- /** @@ -730,6 +751,15 @@ public final class CodedOutputStream { } /** + * Compute the number of bytes that would be needed to encode an embedded + * message stored in lazy field. + */ + public static int computeLazyFieldSizeNoTag(final LazyField value) { + final int size = value.getSerializedSize(); + return computeRawVarint32Size(size) + size; + } + + /** * Compute the number of bytes that would be needed to encode a * {@code bytes} field. */ diff --git a/java/src/main/java/com/google/protobuf/Descriptors.java b/java/src/main/java/com/google/protobuf/Descriptors.java index 2ee84594..a4913053 100644 --- a/java/src/main/java/com/google/protobuf/Descriptors.java +++ b/java/src/main/java/com/google/protobuf/Descriptors.java @@ -35,8 +35,10 @@ import com.google.protobuf.DescriptorProtos.*; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.io.UnsupportedEncodingException; /** @@ -106,6 +108,11 @@ public final class Descriptors { return Collections.unmodifiableList(Arrays.asList(dependencies)); } + /** Get a list of this file's public dependencies (public imports). */ + public List<FileDescriptor> getPublicDependencies() { + return Collections.unmodifiableList(Arrays.asList(publicDependencies)); + } + /** * Find a message type in the file by name. Does not find nested types. * @@ -216,7 +223,7 @@ public final class Descriptors { public static FileDescriptor buildFrom(final FileDescriptorProto proto, final FileDescriptor[] dependencies) throws DescriptorValidationException { - // Building decsriptors involves two steps: translating and linking. + // Building descriptors involves two steps: translating and linking. // In the translation step (implemented by FileDescriptor's // constructor), we build an object tree mirroring the // FileDescriptorProto's tree and put all of the descriptors into the @@ -317,12 +324,12 @@ public final class Descriptors { * {@link FileDescriptor#internalBuildGeneratedFileFrom}, the caller * provides a callback implementing this interface. The callback is called * after the FileDescriptor has been constructed, in order to assign all - * the global variales defined in the generated code which point at parts + * the global variables defined in the generated code which point at parts * of the FileDescriptor. The callback returns an ExtensionRegistry which * contains any extensions which might be used in the descriptor -- that * is, extensions of the various "Options" messages defined in * descriptor.proto. The callback may also return null to indicate that - * no extensions are used in the decsriptor. + * no extensions are used in the descriptor. */ public interface InternalDescriptorAssigner { ExtensionRegistry assignDescriptors(FileDescriptor root); @@ -334,6 +341,7 @@ public final class Descriptors { private final ServiceDescriptor[] services; private final FieldDescriptor[] extensions; private final FileDescriptor[] dependencies; + private final FileDescriptor[] publicDependencies; private final DescriptorPool pool; private FileDescriptor(final FileDescriptorProto proto, @@ -343,6 +351,17 @@ public final class Descriptors { this.pool = pool; this.proto = proto; this.dependencies = dependencies.clone(); + this.publicDependencies = + new FileDescriptor[proto.getPublicDependencyCount()]; + for (int i = 0; i < proto.getPublicDependencyCount(); i++) { + int index = proto.getPublicDependency(i); + if (index < 0 || index >= this.dependencies.length) { + throw new DescriptorValidationException(this, + "Invalid public dependency index."); + } + this.publicDependencies[i] = + this.dependencies[proto.getPublicDependency(i)]; + } pool.addPackage(getPackage(), this); @@ -390,7 +409,7 @@ public final class Descriptors { * in the original. This method is needed for bootstrapping when a file * defines custom options. The options may be defined in the file itself, * so we can't actually parse them until we've constructed the descriptors, - * but to construct the decsriptors we have to have parsed the descriptor + * but to construct the descriptors we have to have parsed the descriptor * protos. So, we have to parse the descriptor protos a second time after * constructing the descriptors. */ @@ -641,7 +660,7 @@ public final class Descriptors { FieldSet.FieldDescriptorLite<FieldDescriptor> { /** * Get the index of this descriptor within its parent. - * @see Descriptor#getIndex() + * @see Descriptors.Descriptor#getIndex() */ public int getIndex() { return index; } @@ -656,7 +675,7 @@ public final class Descriptors { /** * Get the field's fully-qualified name. - * @see Descriptor#getFullName() + * @see Descriptors.Descriptor#getFullName() */ public String getFullName() { return fullName; } @@ -943,7 +962,8 @@ public final class Descriptors { private void crossLink() throws DescriptorValidationException { if (proto.hasExtendee()) { final GenericDescriptor extendee = - file.pool.lookupSymbol(proto.getExtendee(), this); + file.pool.lookupSymbol(proto.getExtendee(), this, + DescriptorPool.SearchFilter.TYPES_ONLY); if (!(extendee instanceof Descriptor)) { throw new DescriptorValidationException(this, '\"' + proto.getExtendee() + "\" is not a message type."); @@ -960,7 +980,8 @@ public final class Descriptors { if (proto.hasTypeName()) { final GenericDescriptor typeDescriptor = - file.pool.lookupSymbol(proto.getTypeName(), this); + file.pool.lookupSymbol(proto.getTypeName(), this, + DescriptorPool.SearchFilter.TYPES_ONLY); if (!proto.hasType()) { // Choose field type based on symbol. @@ -1149,7 +1170,7 @@ public final class Descriptors { implements GenericDescriptor, Internal.EnumLiteMap<EnumValueDescriptor> { /** * Get the index of this descriptor within its parent. - * @see Descriptor#getIndex() + * @see Descriptors.Descriptor#getIndex() */ public int getIndex() { return index; } @@ -1161,7 +1182,7 @@ public final class Descriptors { /** * Get the type's fully-qualified name. - * @see Descriptor#getFullName() + * @see Descriptors.Descriptor#getFullName() */ public String getFullName() { return fullName; } @@ -1182,7 +1203,7 @@ public final class Descriptors { /** * Find an enum value by name. * @param name The unqualified name of the value (e.g. "FOO"). - * @return the value's decsriptor, or {@code null} if not found. + * @return the value's descriptor, or {@code null} if not found. */ public EnumValueDescriptor findValueByName(final String name) { final GenericDescriptor result = @@ -1198,7 +1219,7 @@ public final class Descriptors { * Find an enum value by number. If multiple enum values have the same * number, this returns the first defined value with that number. * @param number The value's number. - * @return the value's decsriptor, or {@code null} if not found. + * @return the value's descriptor, or {@code null} if not found. */ public EnumValueDescriptor findValueByNumber(final int number) { return file.pool.enumValuesByNumber.get( @@ -1261,7 +1282,7 @@ public final class Descriptors { implements GenericDescriptor, Internal.EnumLite { /** * Get the index of this descriptor within its parent. - * @see Descriptor#getIndex() + * @see Descriptors.Descriptor#getIndex() */ public int getIndex() { return index; } @@ -1276,7 +1297,7 @@ public final class Descriptors { /** * Get the value's fully-qualified name. - * @see Descriptor#getFullName() + * @see Descriptors.Descriptor#getFullName() */ public String getFullName() { return fullName; } @@ -1337,7 +1358,7 @@ public final class Descriptors { /** * Get the type's fully-qualified name. - * @see Descriptor#getFullName() + * @see Descriptors.Descriptor#getFullName() */ public String getFullName() { return fullName; } @@ -1355,7 +1376,7 @@ public final class Descriptors { /** * Find a method by name. * @param name The unqualified name of the method (e.g. "Foo"). - * @return the method's decsriptor, or {@code null} if not found. + * @return the method's descriptor, or {@code null} if not found. */ public MethodDescriptor findMethodByName(final String name) { final GenericDescriptor result = @@ -1427,7 +1448,7 @@ public final class Descriptors { /** * Get the method's fully-qualified name. - * @see Descriptor#getFullName() + * @see Descriptors.Descriptor#getFullName() */ public String getFullName() { return fullName; } @@ -1475,7 +1496,8 @@ public final class Descriptors { private void crossLink() throws DescriptorValidationException { final GenericDescriptor input = - file.pool.lookupSymbol(proto.getInputType(), this); + file.pool.lookupSymbol(proto.getInputType(), this, + DescriptorPool.SearchFilter.TYPES_ONLY); if (!(input instanceof Descriptor)) { throw new DescriptorValidationException(this, '\"' + proto.getInputType() + "\" is not a message type."); @@ -1483,7 +1505,8 @@ public final class Descriptors { inputType = (Descriptor)input; final GenericDescriptor output = - file.pool.lookupSymbol(proto.getOutputType(), this); + file.pool.lookupSymbol(proto.getOutputType(), this, + DescriptorPool.SearchFilter.TYPES_ONLY); if (!(output instanceof Descriptor)) { throw new DescriptorValidationException(this, '\"' + proto.getOutputType() + "\" is not a message type."); @@ -1535,7 +1558,7 @@ public final class Descriptors { public String getProblemSymbolName() { return name; } /** - * Gets the the protocol message representation of the invalid descriptor. + * Gets the protocol message representation of the invalid descriptor. */ public Message getProblemProto() { return proto; } @@ -1590,14 +1613,22 @@ public final class Descriptors { * descriptors defined in a particular file. */ private static final class DescriptorPool { + + /** Defines what subclass of descriptors to search in the descriptor pool. + */ + enum SearchFilter { + TYPES_ONLY, AGGREGATES_ONLY, ALL_SYMBOLS + } + DescriptorPool(final FileDescriptor[] dependencies) { - this.dependencies = new DescriptorPool[dependencies.length]; + this.dependencies = new HashSet<FileDescriptor>(); - for (int i = 0; i < dependencies.length; i++) { - this.dependencies[i] = dependencies[i].pool; + for (int i = 0; i < dependencies.length; i++) { + this.dependencies.add(dependencies[i]); + importPublicDependencies(dependencies[i]); } - for (final FileDescriptor dependency : dependencies) { + for (final FileDescriptor dependency : this.dependencies) { try { addPackage(dependency.getPackage(), dependency); } catch (DescriptorValidationException e) { @@ -1609,7 +1640,16 @@ public final class Descriptors { } } - private final DescriptorPool[] dependencies; + /** Find and put public dependencies of the file into dependencies set.*/ + private void importPublicDependencies(final FileDescriptor file) { + for (FileDescriptor dependency : file.getPublicDependencies()) { + if (dependencies.add(dependency)) { + importPublicDependencies(dependency); + } + } + } + + private final Set<FileDescriptor> dependencies; private final Map<String, GenericDescriptor> descriptorsByName = new HashMap<String, GenericDescriptor>(); @@ -1620,39 +1660,81 @@ public final class Descriptors { /** Find a generic descriptor by fully-qualified name. */ GenericDescriptor findSymbol(final String fullName) { + return findSymbol(fullName, SearchFilter.ALL_SYMBOLS); + } + + /** Find a descriptor by fully-qualified name and given option to only + * search valid field type descriptors. + */ + GenericDescriptor findSymbol(final String fullName, + final SearchFilter filter) { GenericDescriptor result = descriptorsByName.get(fullName); if (result != null) { - return result; + if ((filter==SearchFilter.ALL_SYMBOLS) || + ((filter==SearchFilter.TYPES_ONLY) && isType(result)) || + ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) { + return result; + } } - for (final DescriptorPool dependency : dependencies) { - result = dependency.descriptorsByName.get(fullName); + for (final FileDescriptor dependency : dependencies) { + result = dependency.pool.descriptorsByName.get(fullName); if (result != null) { - return result; + if ((filter==SearchFilter.ALL_SYMBOLS) || + ((filter==SearchFilter.TYPES_ONLY) && isType(result)) || + ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) { + return result; + } } } return null; } + /** Checks if the descriptor is a valid type for a message field. */ + boolean isType(GenericDescriptor descriptor) { + return (descriptor instanceof Descriptor) || + (descriptor instanceof EnumDescriptor); + } + + /** Checks if the descriptor is a valid namespace type. */ + boolean isAggregate(GenericDescriptor descriptor) { + return (descriptor instanceof Descriptor) || + (descriptor instanceof EnumDescriptor) || + (descriptor instanceof PackageDescriptor) || + (descriptor instanceof ServiceDescriptor); + } + /** - * Look up a descriptor by name, relative to some other descriptor. + * Look up a type descriptor by name, relative to some other descriptor. * The name may be fully-qualified (with a leading '.'), * partially-qualified, or unqualified. C++-like name lookup semantics * are used to search for the matching descriptor. */ GenericDescriptor lookupSymbol(final String name, - final GenericDescriptor relativeTo) + final GenericDescriptor relativeTo, + final DescriptorPool.SearchFilter filter) throws DescriptorValidationException { // TODO(kenton): This could be optimized in a number of ways. GenericDescriptor result; if (name.startsWith(".")) { // Fully-qualified name. - result = findSymbol(name.substring(1)); + result = findSymbol(name.substring(1), filter); } else { // If "name" is a compound identifier, we want to search for the // first component of it, then search within it for the rest. + // If name is something like "Foo.Bar.baz", and symbols named "Foo" are + // defined in multiple parent scopes, we only want to find "Bar.baz" in + // the innermost one. E.g., the following should produce an error: + // message Bar { message Baz {} } + // message Foo { + // message Bar { + // } + // optional Bar.Baz baz = 1; + // } + // So, we look for just "Foo" first, then look for "Bar.baz" within it + // if found. final int firstPartLength = name.indexOf('.'); final String firstPart; if (firstPartLength == -1) { @@ -1670,14 +1752,15 @@ public final class Descriptors { // Chop off the last component of the scope. final int dotpos = scopeToTry.lastIndexOf("."); if (dotpos == -1) { - result = findSymbol(name); + result = findSymbol(name, filter); break; } else { scopeToTry.setLength(dotpos + 1); - // Append firstPart and try to find. + // Append firstPart and try to find scopeToTry.append(firstPart); - result = findSymbol(scopeToTry.toString()); + result = findSymbol(scopeToTry.toString(), + DescriptorPool.SearchFilter.AGGREGATES_ONLY); if (result != null) { if (firstPartLength != -1) { @@ -1686,7 +1769,7 @@ public final class Descriptors { // searching parent scopes. scopeToTry.setLength(dotpos + 1); scopeToTry.append(name); - result = findSymbol(scopeToTry.toString()); + result = findSymbol(scopeToTry.toString(), filter); } break; } @@ -1817,7 +1900,7 @@ public final class Descriptors { /** * Adds a field to the fieldsByNumber table. Throws an exception if a - * field with hte same containing type and number already exists. + * field with the same containing type and number already exists. */ void addFieldByNumber(final FieldDescriptor field) throws DescriptorValidationException { diff --git a/java/src/main/java/com/google/protobuf/DynamicMessage.java b/java/src/main/java/com/google/protobuf/DynamicMessage.java index c106b662..c0c9fc94 100644 --- a/java/src/main/java/com/google/protobuf/DynamicMessage.java +++ b/java/src/main/java/com/google/protobuf/DynamicMessage.java @@ -35,6 +35,7 @@ import com.google.protobuf.Descriptors.FieldDescriptor; import java.io.InputStream; import java.io.IOException; +import java.util.Collections; import java.util.Map; /** @@ -160,7 +161,9 @@ public final class DynamicMessage extends AbstractMessage { verifyContainingType(field); Object result = fields.getField(field); if (result == null) { - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (field.isRepeated()) { + result = Collections.emptyList(); + } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { result = getDefaultInstance(field.getMessageType()); } else { result = field.getDefaultValue(); @@ -198,10 +201,12 @@ public final class DynamicMessage extends AbstractMessage { return fields.isInitialized(); } + @Override public boolean isInitialized() { return isInitialized(type, fields); } + @Override public void writeTo(CodedOutputStream output) throws IOException { if (type.getOptions().getMessageSetWireFormat()) { fields.writeMessageSetTo(output); @@ -212,6 +217,7 @@ public final class DynamicMessage extends AbstractMessage { } } + @Override public int getSerializedSize() { int size = memoizedSize; if (size != -1) return size; @@ -236,6 +242,26 @@ public final class DynamicMessage extends AbstractMessage { return newBuilderForType().mergeFrom(this); } + public Parser<DynamicMessage> getParserForType() { + return new AbstractParser<DynamicMessage>() { + public DynamicMessage parsePartialFrom( + CodedInputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + Builder builder = newBuilder(type); + try { + builder.mergeFrom(input, extensionRegistry); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(builder.buildPartial()); + } catch (IOException e) { + throw new InvalidProtocolBufferException(e.getMessage()) + .setUnfinishedMessage(builder.buildPartial()); + } + return builder.buildPartial(); + } + }; + } + /** Verifies that the field is a field of this message. */ private void verifyContainingType(FieldDescriptor field) { if (field.getContainingType() != type) { @@ -264,14 +290,18 @@ public final class DynamicMessage extends AbstractMessage { // --------------------------------------------------------------- // Implementation of Message.Builder interface. + @Override public Builder clear() { - if (fields == null) { - throw new IllegalStateException("Cannot call clear() after build()."); + if (fields.isImmutable()) { + fields = FieldSet.newFieldSet(); + } else { + fields.clear(); } - fields.clear(); + unknownFields = UnknownFieldSet.getDefaultInstance(); return this; } + @Override public Builder mergeFrom(Message other) { if (other instanceof DynamicMessage) { // This should be somewhat faster than calling super.mergeFrom(). @@ -280,6 +310,7 @@ public final class DynamicMessage extends AbstractMessage { throw new IllegalArgumentException( "mergeFrom(Message) can only merge messages of the same type."); } + ensureIsMutable(); fields.mergeFrom(otherDynamicMessage.fields); mergeUnknownFields(otherDynamicMessage.unknownFields); return this; @@ -289,8 +320,7 @@ public final class DynamicMessage extends AbstractMessage { } public DynamicMessage build() { - // If fields == null, we'll throw an appropriate exception later. - if (fields != null && !isInitialized()) { + if (!isInitialized()) { throw newUninitializedMessageException( new DynamicMessage(type, fields, unknownFields)); } @@ -312,21 +342,17 @@ public final class DynamicMessage extends AbstractMessage { } public DynamicMessage buildPartial() { - if (fields == null) { - throw new IllegalStateException( - "build() has already been called on this Builder."); - } fields.makeImmutable(); DynamicMessage result = new DynamicMessage(type, fields, unknownFields); - fields = null; - unknownFields = null; return result; } + @Override public Builder clone() { Builder result = new Builder(type); result.fields.mergeFrom(fields); + result.mergeUnknownFields(unknownFields); return result; } @@ -377,12 +403,14 @@ public final class DynamicMessage extends AbstractMessage { public Builder setField(FieldDescriptor field, Object value) { verifyContainingType(field); + ensureIsMutable(); fields.setField(field, value); return this; } public Builder clearField(FieldDescriptor field) { verifyContainingType(field); + ensureIsMutable(); fields.clearField(field); return this; } @@ -400,12 +428,14 @@ public final class DynamicMessage extends AbstractMessage { public Builder setRepeatedField(FieldDescriptor field, int index, Object value) { verifyContainingType(field); + ensureIsMutable(); fields.setRepeatedField(field, index, value); return this; } public Builder addRepeatedField(FieldDescriptor field, Object value) { verifyContainingType(field); + ensureIsMutable(); fields.addRepeatedField(field, value); return this; } @@ -419,6 +449,7 @@ public final class DynamicMessage extends AbstractMessage { return this; } + @Override public Builder mergeUnknownFields(UnknownFieldSet unknownFields) { this.unknownFields = UnknownFieldSet.newBuilder(this.unknownFields) @@ -434,5 +465,18 @@ public final class DynamicMessage extends AbstractMessage { "FieldDescriptor does not match message type."); } } + + private void ensureIsMutable() { + if (fields.isImmutable()) { + fields = fields.clone(); + } + } + + @Override + public com.google.protobuf.Message.Builder getFieldBuilder(FieldDescriptor field) { + // TODO(xiangl): need implementation for dynamic message + throw new UnsupportedOperationException( + "getFieldBuilder() called on a dynamic message type."); + } } } diff --git a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java index d5288dd8..1e1289d0 100644 --- a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java +++ b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java @@ -43,7 +43,7 @@ import java.util.Map; * make sense to mix the two, since if you have any regular types in your * program, you then require the full runtime and lose all the benefits of * the lite runtime, so you might as well make all your types be regular types. - * However, in some cases (e.g. when depending on multiple third-patry libraries + * However, in some cases (e.g. when depending on multiple third-party libraries * where one uses lite types and one uses regular), you may find yourself * wanting to mix the two. In this case things get more complicated. * <p> @@ -71,6 +71,22 @@ import java.util.Map; * @author kenton@google.com Kenton Varda */ public class ExtensionRegistryLite { + + // Set true to enable lazy parsing feature for MessageSet. + // + // TODO(xiangl): Now we use a global flag to control whether enable lazy + // parsing feature for MessageSet, which may be too crude for some + // applications. Need to support this feature on smaller granularity. + private static volatile boolean eagerlyParseMessageSets = false; + + public static boolean isEagerlyParseMessageSets() { + return eagerlyParseMessageSets; + } + + public static void setEagerlyParseMessageSets(boolean isEagerlyParse) { + eagerlyParseMessageSets = isEagerlyParse; + } + /** Construct a new, empty instance. */ public static ExtensionRegistryLite newInstance() { return new ExtensionRegistryLite(); diff --git a/java/src/main/java/com/google/protobuf/FieldSet.java b/java/src/main/java/com/google/protobuf/FieldSet.java index a85dbaa6..2663694f 100644 --- a/java/src/main/java/com/google/protobuf/FieldSet.java +++ b/java/src/main/java/com/google/protobuf/FieldSet.java @@ -30,12 +30,14 @@ package com.google.protobuf; +import com.google.protobuf.LazyField.LazyIterator; + +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.io.IOException; /** * A class which represents an arbitrary set of fields of some message type. @@ -68,6 +70,7 @@ final class FieldSet<FieldDescriptorType extends private final SmallSortedMap<FieldDescriptorType, Object> fields; private boolean isImmutable; + private boolean hasLazyField = false; /** Construct a new FieldSet. */ private FieldSet() { @@ -95,7 +98,7 @@ final class FieldSet<FieldDescriptorType extends FieldSet<T> emptySet() { return DEFAULT_INSTANCE; } - @SuppressWarnings("unchecked") + @SuppressWarnings("rawtypes") private static final FieldSet DEFAULT_INSTANCE = new FieldSet(true); /** Make this FieldSet immutable from this point forward. */ @@ -109,7 +112,7 @@ final class FieldSet<FieldDescriptorType extends } /** - * Retuns whether the FieldSet is immutable. This is true if it is the + * Returns whether the FieldSet is immutable. This is true if it is the * {@link #emptySet} or if {@link #makeImmutable} were called. * * @return whether the FieldSet is immutable. @@ -139,6 +142,7 @@ final class FieldSet<FieldDescriptorType extends FieldDescriptorType descriptor = entry.getKey(); clone.setField(descriptor, entry.getValue()); } + clone.hasLazyField = hasLazyField; return clone; } @@ -147,21 +151,52 @@ final class FieldSet<FieldDescriptorType extends /** See {@link Message.Builder#clear()}. */ public void clear() { fields.clear(); + hasLazyField = false; } /** * Get a simple map containing all the fields. */ public Map<FieldDescriptorType, Object> getAllFields() { + if (hasLazyField) { + SmallSortedMap<FieldDescriptorType, Object> result = + SmallSortedMap.newFieldMap(16); + for (int i = 0; i < fields.getNumArrayEntries(); i++) { + cloneFieldEntry(result, fields.getArrayEntryAt(i)); + } + for (Map.Entry<FieldDescriptorType, Object> entry : + fields.getOverflowEntries()) { + cloneFieldEntry(result, entry); + } + if (fields.isImmutable()) { + result.makeImmutable(); + } + return result; + } return fields.isImmutable() ? fields : Collections.unmodifiableMap(fields); } + private void cloneFieldEntry(Map<FieldDescriptorType, Object> map, + Map.Entry<FieldDescriptorType, Object> entry) { + FieldDescriptorType key = entry.getKey(); + Object value = entry.getValue(); + if (value instanceof LazyField) { + map.put(key, ((LazyField) value).getValue()); + } else { + map.put(key, value); + } + } + /** * Get an iterator to the field map. This iterator should not be leaked out - * of the protobuf library as it is not protected from mutation when - * fields is not immutable. + * of the protobuf library as it is not protected from mutation when fields + * is not immutable. */ public Iterator<Map.Entry<FieldDescriptorType, Object>> iterator() { + if (hasLazyField) { + return new LazyIterator<FieldDescriptorType>( + fields.entrySet().iterator()); + } return fields.entrySet().iterator(); } @@ -185,14 +220,18 @@ final class FieldSet<FieldDescriptorType extends * to the caller to fetch the field's default value. */ public Object getField(final FieldDescriptorType descriptor) { - return fields.get(descriptor); + Object o = fields.get(descriptor); + if (o instanceof LazyField) { + return ((LazyField) o).getValue(); + } + return o; } /** * Useful for implementing * {@link Message.Builder#setField(Descriptors.FieldDescriptor,Object)}. */ - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked", "rawtypes"}) public void setField(final FieldDescriptorType descriptor, Object value) { if (descriptor.isRepeated()) { @@ -204,7 +243,7 @@ final class FieldSet<FieldDescriptorType extends // Wrap the contents in a new list so that the caller cannot change // the list's contents after setting it. final List newList = new ArrayList(); - newList.addAll((List)value); + newList.addAll((List) value); for (final Object element : newList) { verifyType(descriptor.getLiteType(), element); } @@ -213,6 +252,9 @@ final class FieldSet<FieldDescriptorType extends verifyType(descriptor.getLiteType(), value); } + if (value instanceof LazyField) { + hasLazyField = true; + } fields.put(descriptor, value); } @@ -222,6 +264,9 @@ final class FieldSet<FieldDescriptorType extends */ public void clearField(final FieldDescriptorType descriptor) { fields.remove(descriptor); + if (fields.isEmpty()) { + hasLazyField = false; + } } /** @@ -234,7 +279,7 @@ final class FieldSet<FieldDescriptorType extends "getRepeatedField() can only be called on repeated fields."); } - final Object value = fields.get(descriptor); + final Object value = getField(descriptor); if (value == null) { return 0; } else { @@ -253,7 +298,7 @@ final class FieldSet<FieldDescriptorType extends "getRepeatedField() can only be called on repeated fields."); } - final Object value = fields.get(descriptor); + final Object value = getField(descriptor); if (value == null) { throw new IndexOutOfBoundsException(); @@ -275,13 +320,13 @@ final class FieldSet<FieldDescriptorType extends "getRepeatedField() can only be called on repeated fields."); } - final Object list = fields.get(descriptor); + final Object list = getField(descriptor); if (list == null) { throw new IndexOutOfBoundsException(); } verifyType(descriptor.getLiteType(), value); - ((List) list).set(index, value); + ((List<Object>) list).set(index, value); } /** @@ -298,13 +343,13 @@ final class FieldSet<FieldDescriptorType extends verifyType(descriptor.getLiteType(), value); - final Object existingValue = fields.get(descriptor); - List list; + final Object existingValue = getField(descriptor); + List<Object> list; if (existingValue == null) { - list = new ArrayList(); + list = new ArrayList<Object>(); fields.put(descriptor, list); } else { - list = (List) existingValue; + list = (List<Object>) existingValue; } list.add(value); @@ -338,7 +383,8 @@ final class FieldSet<FieldDescriptorType extends break; case MESSAGE: // TODO(kenton): Caller must do type checking here, I guess. - isValid = value instanceof MessageLite; + isValid = + (value instanceof MessageLite) || (value instanceof LazyField); break; } @@ -392,8 +438,16 @@ final class FieldSet<FieldDescriptorType extends } } } else { - if (!((MessageLite) entry.getValue()).isInitialized()) { - return false; + Object value = entry.getValue(); + if (value instanceof MessageLite) { + if (!((MessageLite) value).isInitialized()) { + return false; + } + } else if (value instanceof LazyField) { + return true; + } else { + throw new IllegalArgumentException( + "Wrong object type used with protocol message reflection."); } } } @@ -416,7 +470,8 @@ final class FieldSet<FieldDescriptorType extends } /** - * Like {@link #mergeFrom(Message)}, but merges from another {@link FieldSet}. + * Like {@link Message.Builder#mergeFrom(Message)}, but merges from another + * {@link FieldSet}. */ public void mergeFrom(final FieldSet<FieldDescriptorType> other) { for (int i = 0; i < other.fields.getNumArrayEntries(); i++) { @@ -428,14 +483,17 @@ final class FieldSet<FieldDescriptorType extends } } - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked", "rawtypes"}) private void mergeFromField( final Map.Entry<FieldDescriptorType, Object> entry) { final FieldDescriptorType descriptor = entry.getKey(); - final Object otherValue = entry.getValue(); + Object otherValue = entry.getValue(); + if (otherValue instanceof LazyField) { + otherValue = ((LazyField) otherValue).getValue(); + } if (descriptor.isRepeated()) { - Object value = fields.get(descriptor); + Object value = getField(descriptor); if (value == null) { // Our list is empty, but we still need to make a defensive copy of // the other list since we don't know if the other FieldSet is still @@ -446,7 +504,7 @@ final class FieldSet<FieldDescriptorType extends ((List) value).addAll((List) otherValue); } } else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) { - Object value = fields.get(descriptor); + Object value = getField(descriptor); if (value == null) { fields.put(descriptor, otherValue); } else { @@ -457,7 +515,6 @@ final class FieldSet<FieldDescriptorType extends ((MessageLite) value).toBuilder(), (MessageLite) otherValue) .build()); } - } else { fields.put(descriptor, otherValue); } @@ -646,7 +703,11 @@ final class FieldSet<FieldDescriptorType extends } } } else { - writeElement(output, type, number, value); + if (value instanceof LazyField) { + writeElement(output, type, number, ((LazyField) value).getValue()); + } else { + writeElement(output, type, number, value); + } } } @@ -686,12 +747,18 @@ final class FieldSet<FieldDescriptorType extends private int getMessageSetSerializedSize( final Map.Entry<FieldDescriptorType, Object> entry) { final FieldDescriptorType descriptor = entry.getKey(); - if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE && - !descriptor.isRepeated() && !descriptor.isPacked()) { - return CodedOutputStream.computeMessageSetExtensionSize( - entry.getKey().getNumber(), (MessageLite) entry.getValue()); + Object value = entry.getValue(); + if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE + && !descriptor.isRepeated() && !descriptor.isPacked()) { + if (value instanceof LazyField) { + return CodedOutputStream.computeLazyFieldMessageSetExtensionSize( + entry.getKey().getNumber(), (LazyField) value); + } else { + return CodedOutputStream.computeMessageSetExtensionSize( + entry.getKey().getNumber(), (MessageLite) value); + } } else { - return computeFieldSize(descriptor, entry.getValue()); + return computeFieldSize(descriptor, value); } } @@ -741,7 +808,6 @@ final class FieldSet<FieldDescriptorType extends case BOOL : return CodedOutputStream.computeBoolSizeNoTag ((Boolean )value); case STRING : return CodedOutputStream.computeStringSizeNoTag ((String )value); case GROUP : return CodedOutputStream.computeGroupSizeNoTag ((MessageLite)value); - case MESSAGE : return CodedOutputStream.computeMessageSizeNoTag ((MessageLite)value); case BYTES : return CodedOutputStream.computeBytesSizeNoTag ((ByteString )value); case UINT32 : return CodedOutputStream.computeUInt32SizeNoTag ((Integer )value); case SFIXED32: return CodedOutputStream.computeSFixed32SizeNoTag((Integer )value); @@ -749,6 +815,13 @@ final class FieldSet<FieldDescriptorType extends case SINT32 : return CodedOutputStream.computeSInt32SizeNoTag ((Integer )value); case SINT64 : return CodedOutputStream.computeSInt64SizeNoTag ((Long )value); + case MESSAGE: + if (value instanceof LazyField) { + return CodedOutputStream.computeLazyFieldSizeNoTag((LazyField) value); + } else { + return CodedOutputStream.computeMessageSizeNoTag((MessageLite) value); + } + case ENUM: return CodedOutputStream.computeEnumSizeNoTag( ((Internal.EnumLite) value).getNumber()); diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/src/main/java/com/google/protobuf/GeneratedMessage.java index b5eaded5..0c15ca84 100644 --- a/java/src/main/java/com/google/protobuf/GeneratedMessage.java +++ b/java/src/main/java/com/google/protobuf/GeneratedMessage.java @@ -58,8 +58,6 @@ public abstract class GeneratedMessage extends AbstractMessage implements Serializable { private static final long serialVersionUID = 1L; - private final UnknownFieldSet unknownFields; - /** * For testing. Allows a test to disable the optimization that avoids using * field builders for nested messages until they are requested. By disabling @@ -68,11 +66,14 @@ public abstract class GeneratedMessage extends AbstractMessage protected static boolean alwaysUseFieldBuilders = false; protected GeneratedMessage() { - this.unknownFields = UnknownFieldSet.getDefaultInstance(); } protected GeneratedMessage(Builder<?> builder) { - this.unknownFields = builder.getUnknownFields(); + } + + public Parser<? extends Message> getParserForType() { + throw new UnsupportedOperationException( + "This is supposed to be overridden by subclasses."); } /** @@ -175,8 +176,28 @@ public abstract class GeneratedMessage extends AbstractMessage } //@Override (Java 1.6 override semantics, but we must support 1.5) - public final UnknownFieldSet getUnknownFields() { - return unknownFields; + public UnknownFieldSet getUnknownFields() { + throw new UnsupportedOperationException( + "This is supposed to be overridden by subclasses."); + } + + /** + * Called by subclasses to parse an unknown field. + * @return {@code true} unless the tag is an end-group tag. + */ + protected boolean parseUnknownField( + CodedInputStream input, + UnknownFieldSet.Builder unknownFields, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + return unknownFields.mergeFieldFrom(tag, input); + } + + /** + * Used by parsing constructors in generated classes. + */ + protected void makeExtensionsImmutable() { + // Noop for messages without extensions. } protected abstract Message.Builder newBuilderForType(BuilderParent parent); @@ -319,6 +340,11 @@ public abstract class GeneratedMessage extends AbstractMessage } //@Override (Java 1.6 override semantics, but we must support 1.5) + public Message.Builder getFieldBuilder(final FieldDescriptor field) { + return internalGetFieldAccessorTable().getField(field).getBuilder(this); + } + + //@Override (Java 1.6 override semantics, but we must support 1.5) public boolean hasField(final FieldDescriptor field) { return internalGetFieldAccessorTable().getField(field).has(this); } @@ -626,6 +652,25 @@ public abstract class GeneratedMessage extends AbstractMessage return super.isInitialized() && extensionsAreInitialized(); } + @Override + protected boolean parseUnknownField( + CodedInputStream input, + UnknownFieldSet.Builder unknownFields, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + return AbstractMessage.Builder.mergeFieldFrom( + input, unknownFields, extensionRegistry, getDescriptorForType(), + null, extensions, tag); + } + + /** + * Used by parsing constructors in generated classes. + */ + @Override + protected void makeExtensionsImmutable() { + extensions.makeImmutable(); + } + /** * Used by subclasses to serialize extensions. Extension ranges may be * interleaved with field numbers, but we must write them in canonical @@ -655,9 +700,21 @@ public abstract class GeneratedMessage extends AbstractMessage if (messageSetWireFormat && descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE && !descriptor.isRepeated()) { - output.writeMessageSetExtension(descriptor.getNumber(), - (Message) next.getValue()); + if (next instanceof LazyField.LazyEntry<?>) { + output.writeRawMessageSetExtension(descriptor.getNumber(), + ((LazyField.LazyEntry<?>) next).getField().toByteString()); + } else { + output.writeMessageSetExtension(descriptor.getNumber(), + (Message) next.getValue()); + } } else { + // TODO(xiangl): Taken care of following code, it may cause + // problem when we use LazyField for normal fields/extensions. + // Due to the optional field can be duplicated at the end of + // serialized bytes, which will make the serialized size change + // after lazy field parsed. So when we use LazyField globally, + // we need to change the following write method to write cached + // bytes directly rather than write the parsed message. FieldSet.writeField(descriptor, next.getValue(), output); } if (iter.hasNext()) { @@ -974,7 +1031,8 @@ public abstract class GeneratedMessage extends AbstractMessage final ExtensionRegistryLite extensionRegistry, final int tag) throws IOException { return AbstractMessage.Builder.mergeFieldFrom( - input, unknownFields, extensionRegistry, this, tag); + input, unknownFields, extensionRegistry, getDescriptorForType(), + this, null, tag); } // --------------------------------------------------------------- @@ -1405,39 +1463,72 @@ public abstract class GeneratedMessage extends AbstractMessage final String[] camelCaseNames, final Class<? extends GeneratedMessage> messageClass, final Class<? extends Builder> builderClass) { + this(descriptor, camelCaseNames); + ensureFieldAccessorsInitialized(messageClass, builderClass); + } + + /** + * Construct a FieldAccessorTable for a particular message class without + * initializing FieldAccessors. + */ + public FieldAccessorTable( + final Descriptor descriptor, + final String[] camelCaseNames) { this.descriptor = descriptor; + this.camelCaseNames = camelCaseNames; fields = new FieldAccessor[descriptor.getFields().size()]; + initialized = false; + } - for (int i = 0; i < fields.length; i++) { - final FieldDescriptor field = descriptor.getFields().get(i); - if (field.isRepeated()) { - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - fields[i] = new RepeatedMessageFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); - } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) { - fields[i] = new RepeatedEnumFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); - } else { - fields[i] = new RepeatedFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); - } - } else { - if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { - fields[i] = new SingularMessageFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); - } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) { - fields[i] = new SingularEnumFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); + /** + * Ensures the field accessors are initialized. This method is thread-safe. + * + * @param messageClass The message type. + * @param builderClass The builder type. + * @return this + */ + public FieldAccessorTable ensureFieldAccessorsInitialized( + Class<? extends GeneratedMessage> messageClass, + Class<? extends Builder> builderClass) { + if (initialized) { return this; } + synchronized (this) { + if (initialized) { return this; } + for (int i = 0; i < fields.length; i++) { + FieldDescriptor field = descriptor.getFields().get(i); + if (field.isRepeated()) { + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + fields[i] = new RepeatedMessageFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) { + fields[i] = new RepeatedEnumFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } else { + fields[i] = new RepeatedFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } } else { - fields[i] = new SingularFieldAccessor( - field, camelCaseNames[i], messageClass, builderClass); + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + fields[i] = new SingularMessageFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) { + fields[i] = new SingularEnumFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } else { + fields[i] = new SingularFieldAccessor( + field, camelCaseNames[i], messageClass, builderClass); + } } } + initialized = true; + camelCaseNames = null; + return this; } } private final Descriptor descriptor; private final FieldAccessor[] fields; + private String[] camelCaseNames; + private volatile boolean initialized; /** Get the FieldAccessor for a particular field. */ private FieldAccessor getField(final FieldDescriptor field) { @@ -1472,6 +1563,7 @@ public abstract class GeneratedMessage extends AbstractMessage int getRepeatedCount(GeneratedMessage.Builder builder); void clear(Builder builder); Message.Builder newBuilder(); + Message.Builder getBuilder(GeneratedMessage.Builder builder); } // --------------------------------------------------------------- @@ -1551,6 +1643,10 @@ public abstract class GeneratedMessage extends AbstractMessage throw new UnsupportedOperationException( "newBuilderForField() called on a non-Message type."); } + public Message.Builder getBuilder(GeneratedMessage.Builder builder) { + throw new UnsupportedOperationException( + "getFieldBuilder() called on a non-Message type."); + } } private static class RepeatedFieldAccessor implements FieldAccessor { @@ -1573,8 +1669,6 @@ public abstract class GeneratedMessage extends AbstractMessage "get" + camelCaseName + "List"); getMethodBuilder = getMethodOrDie(builderClass, "get" + camelCaseName + "List"); - - getRepeatedMethod = getMethodOrDie(messageClass, "get" + camelCaseName, Integer.TYPE); getRepeatedMethodBuilder = @@ -1625,11 +1719,11 @@ public abstract class GeneratedMessage extends AbstractMessage } public boolean has(final GeneratedMessage message) { throw new UnsupportedOperationException( - "hasField() called on a singular field."); + "hasField() called on a repeated field."); } public boolean has(GeneratedMessage.Builder builder) { throw new UnsupportedOperationException( - "hasField() called on a singular field."); + "hasField() called on a repeated field."); } public int getRepeatedCount(final GeneratedMessage message) { return (Integer) invokeOrDie(getCountMethod, message); @@ -1644,6 +1738,10 @@ public abstract class GeneratedMessage extends AbstractMessage throw new UnsupportedOperationException( "newBuilderForField() called on a non-Message type."); } + public Message.Builder getBuilder(GeneratedMessage.Builder builder) { + throw new UnsupportedOperationException( + "getFieldBuilder() called on a non-Message type."); + } } // --------------------------------------------------------------- @@ -1753,9 +1851,12 @@ public abstract class GeneratedMessage extends AbstractMessage super(descriptor, camelCaseName, messageClass, builderClass); newBuilderMethod = getMethodOrDie(type, "newBuilder"); + getBuilderMethodBuilder = + getMethodOrDie(builderClass, "get" + camelCaseName + "Builder"); } private final Method newBuilderMethod; + private final Method getBuilderMethodBuilder; private Object coerceType(final Object value) { if (type.isInstance(value)) { @@ -1766,7 +1867,7 @@ public abstract class GeneratedMessage extends AbstractMessage // DynamicMessage -- we should accept it. In this case we can make // a copy of the message. return ((Message.Builder) invokeOrDie(newBuilderMethod, null)) - .mergeFrom((Message) value).build(); + .mergeFrom((Message) value).buildPartial(); } } @@ -1778,6 +1879,10 @@ public abstract class GeneratedMessage extends AbstractMessage public Message.Builder newBuilder() { return (Message.Builder) invokeOrDie(newBuilderMethod, null); } + @Override + public Message.Builder getBuilder(GeneratedMessage.Builder builder) { + return (Message.Builder) invokeOrDie(getBuilderMethodBuilder, builder); + } } private static final class RepeatedMessageFieldAccessor @@ -1825,7 +1930,7 @@ public abstract class GeneratedMessage extends AbstractMessage /** * Replaces this object in the output stream with a serialized form. * Part of Java's serialization magic. Generated sub-classes must override - * this method by calling <code>return super.writeReplace();</code> + * this method by calling {@code return super.writeReplace();} * @return a SerializedForm of this message */ protected Object writeReplace() throws ObjectStreamException { diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java index 1813e9b3..437e3412 100644 --- a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java +++ b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java @@ -55,6 +55,29 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite protected GeneratedMessageLite(Builder builder) { } + public Parser<? extends MessageLite> getParserForType() { + throw new UnsupportedOperationException( + "This is supposed to be overridden by subclasses."); + } + + /** + * Called by subclasses to parse an unknown field. + * @return {@code true} unless the tag is an end-group tag. + */ + protected boolean parseUnknownField( + CodedInputStream input, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + return input.skipField(tag); + } + + /** + * Used by parsing constructors in generated classes. + */ + protected void makeExtensionsImmutable() { + // Noop for messages without extensions. + } + @SuppressWarnings("unchecked") public abstract static class Builder<MessageType extends GeneratedMessageLite, BuilderType extends Builder> @@ -86,9 +109,9 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite * @return {@code true} unless the tag is an end-group tag. */ protected boolean parseUnknownField( - final CodedInputStream input, - final ExtensionRegistryLite extensionRegistry, - final int tag) throws IOException { + CodedInputStream input, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { return input.skipField(tag); } } @@ -194,6 +217,31 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite } /** + * Called by subclasses to parse an unknown field or an extension. + * @return {@code true} unless the tag is an end-group tag. + */ + @Override + protected boolean parseUnknownField( + CodedInputStream input, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + return GeneratedMessageLite.parseUnknownField( + extensions, + getDefaultInstanceForType(), + input, + extensionRegistry, + tag); + } + + /** + * Used by parsing constructors in generated classes. + */ + @Override + protected void makeExtensionsImmutable() { + extensions.makeImmutable(); + } + + /** * Used by subclasses to serialize extensions. Extension ranges may be * interleaved with field numbers, but we must write them in canonical * (sorted by field number) order. ExtensionWriter helps us write @@ -400,121 +448,139 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite */ @Override protected boolean parseUnknownField( - final CodedInputStream input, - final ExtensionRegistryLite extensionRegistry, - final int tag) throws IOException { - final int wireType = WireFormat.getTagWireType(tag); - final int fieldNumber = WireFormat.getTagFieldNumber(tag); - - final GeneratedExtension<MessageType, ?> extension = - extensionRegistry.findLiteExtensionByNumber( - getDefaultInstanceForType(), fieldNumber); - - boolean unknown = false; - boolean packed = false; - if (extension == null) { - unknown = true; // Unknown field. - } else if (wireType == FieldSet.getWireFormatForFieldType( - extension.descriptor.getLiteType(), - false /* isPacked */)) { - packed = false; // Normal, unpacked value. - } else if (extension.descriptor.isRepeated && - extension.descriptor.type.isPackable() && - wireType == FieldSet.getWireFormatForFieldType( - extension.descriptor.getLiteType(), - true /* isPacked */)) { - packed = true; // Packed value. - } else { - unknown = true; // Wrong wire type. - } + CodedInputStream input, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + ensureExtensionsIsMutable(); + return GeneratedMessageLite.parseUnknownField( + extensions, + getDefaultInstanceForType(), + input, + extensionRegistry, + tag); + } - if (unknown) { // Unknown field or wrong wire type. Skip. - return input.skipField(tag); - } + protected final void mergeExtensionFields(final MessageType other) { + ensureExtensionsIsMutable(); + extensions.mergeFrom(((ExtendableMessage) other).extensions); + } + } - if (packed) { - final int length = input.readRawVarint32(); - final int limit = input.pushLimit(length); - if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) { - while (input.getBytesUntilLimit() > 0) { - final int rawValue = input.readEnum(); - final Object value = - extension.descriptor.getEnumType().findValueByNumber(rawValue); - if (value == null) { - // If the number isn't recognized as a valid value for this - // enum, drop it (don't even add it to unknownFields). - return true; - } - ensureExtensionsIsMutable(); - extensions.addRepeatedField(extension.descriptor, value); - } - } else { - while (input.getBytesUntilLimit() > 0) { - final Object value = - FieldSet.readPrimitiveField(input, - extension.descriptor.getLiteType()); - ensureExtensionsIsMutable(); - extensions.addRepeatedField(extension.descriptor, value); + // ----------------------------------------------------------------- + + /** + * Parse an unknown field or an extension. + * @return {@code true} unless the tag is an end-group tag. + */ + private static <MessageType extends MessageLite> + boolean parseUnknownField( + FieldSet<ExtensionDescriptor> extensions, + MessageType defaultInstance, + CodedInputStream input, + ExtensionRegistryLite extensionRegistry, + int tag) throws IOException { + int wireType = WireFormat.getTagWireType(tag); + int fieldNumber = WireFormat.getTagFieldNumber(tag); + + GeneratedExtension<MessageType, ?> extension = + extensionRegistry.findLiteExtensionByNumber( + defaultInstance, fieldNumber); + + boolean unknown = false; + boolean packed = false; + if (extension == null) { + unknown = true; // Unknown field. + } else if (wireType == FieldSet.getWireFormatForFieldType( + extension.descriptor.getLiteType(), + false /* isPacked */)) { + packed = false; // Normal, unpacked value. + } else if (extension.descriptor.isRepeated && + extension.descriptor.type.isPackable() && + wireType == FieldSet.getWireFormatForFieldType( + extension.descriptor.getLiteType(), + true /* isPacked */)) { + packed = true; // Packed value. + } else { + unknown = true; // Wrong wire type. + } + + if (unknown) { // Unknown field or wrong wire type. Skip. + return input.skipField(tag); + } + + if (packed) { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) { + while (input.getBytesUntilLimit() > 0) { + int rawValue = input.readEnum(); + Object value = + extension.descriptor.getEnumType().findValueByNumber(rawValue); + if (value == null) { + // If the number isn't recognized as a valid value for this + // enum, drop it (don't even add it to unknownFields). + return true; } + extensions.addRepeatedField(extension.descriptor, value); } - input.popLimit(limit); } else { - final Object value; - switch (extension.descriptor.getLiteJavaType()) { - case MESSAGE: { - MessageLite.Builder subBuilder = null; - if (!extension.descriptor.isRepeated()) { - MessageLite existingValue = - (MessageLite) extensions.getField(extension.descriptor); - if (existingValue != null) { - subBuilder = existingValue.toBuilder(); - } - } - if (subBuilder == null) { - subBuilder = extension.messageDefaultInstance.newBuilderForType(); - } - if (extension.descriptor.getLiteType() == - WireFormat.FieldType.GROUP) { - input.readGroup(extension.getNumber(), - subBuilder, extensionRegistry); - } else { - input.readMessage(subBuilder, extensionRegistry); + while (input.getBytesUntilLimit() > 0) { + Object value = + FieldSet.readPrimitiveField(input, + extension.descriptor.getLiteType()); + extensions.addRepeatedField(extension.descriptor, value); + } + } + input.popLimit(limit); + } else { + Object value; + switch (extension.descriptor.getLiteJavaType()) { + case MESSAGE: { + MessageLite.Builder subBuilder = null; + if (!extension.descriptor.isRepeated()) { + MessageLite existingValue = + (MessageLite) extensions.getField(extension.descriptor); + if (existingValue != null) { + subBuilder = existingValue.toBuilder(); } - value = subBuilder.build(); - break; } - case ENUM: - final int rawValue = input.readEnum(); - value = extension.descriptor.getEnumType() - .findValueByNumber(rawValue); - // If the number isn't recognized as a valid value for this enum, - // drop it. - if (value == null) { - return true; - } - break; - default: - value = FieldSet.readPrimitiveField(input, - extension.descriptor.getLiteType()); - break; - } - - if (extension.descriptor.isRepeated()) { - ensureExtensionsIsMutable(); - extensions.addRepeatedField(extension.descriptor, value); - } else { - ensureExtensionsIsMutable(); - extensions.setField(extension.descriptor, value); + if (subBuilder == null) { + subBuilder = extension.messageDefaultInstance.newBuilderForType(); + } + if (extension.descriptor.getLiteType() == + WireFormat.FieldType.GROUP) { + input.readGroup(extension.getNumber(), + subBuilder, extensionRegistry); + } else { + input.readMessage(subBuilder, extensionRegistry); + } + value = subBuilder.build(); + break; } + case ENUM: + int rawValue = input.readEnum(); + value = extension.descriptor.getEnumType() + .findValueByNumber(rawValue); + // If the number isn't recognized as a valid value for this enum, + // drop it. + if (value == null) { + return true; + } + break; + default: + value = FieldSet.readPrimitiveField(input, + extension.descriptor.getLiteType()); + break; } - return true; + if (extension.descriptor.isRepeated()) { + extensions.addRepeatedField(extension.descriptor, value); + } else { + extensions.setField(extension.descriptor, value); + } } - protected final void mergeExtensionFields(final MessageType other) { - ensureExtensionsIsMutable(); - extensions.mergeFrom(((ExtendableMessage) other).extensions); - } + return true; } // ----------------------------------------------------------------- @@ -722,7 +788,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite /** * Replaces this object in the output stream with a serialized form. * Part of Java's serialization magic. Generated sub-classes must override - * this method by calling <code>return super.writeReplace();</code> + * this method by calling {@code return super.writeReplace();} * @return a SerializedForm of this message */ protected Object writeReplace() throws ObjectStreamException { diff --git a/java/src/main/java/com/google/protobuf/Internal.java b/java/src/main/java/com/google/protobuf/Internal.java index 05eab57a..81af2583 100644 --- a/java/src/main/java/com/google/protobuf/Internal.java +++ b/java/src/main/java/com/google/protobuf/Internal.java @@ -103,85 +103,32 @@ public class Internal { * Helper called by generated code to determine if a byte array is a valid * UTF-8 encoded string such that the original bytes can be converted to * a String object and then back to a byte array round tripping the bytes - * without loss. - * <p> - * This is inspired by UTF_8.java in sun.nio.cs. + * without loss. More precisely, returns {@code true} whenever: + * <pre> {@code + * Arrays.equals(byteString.toByteArray(), + * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) + * }</pre> + * + * <p>This method rejects "overlong" byte sequences, as well as + * 3-byte sequences that would map to a surrogate character, in + * accordance with the restricted definition of UTF-8 introduced in + * Unicode 3.1. Note that the UTF-8 decoder included in Oracle's + * JDK has been modified to also reject "overlong" byte sequences, + * but currently (2011) still accepts 3-byte surrogate character + * byte sequences. + * + * <p>See the Unicode Standard,</br> + * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> + * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. + * + * <p>As of 2011-02, this method simply returns the result of {@link + * ByteString#isValidUtf8()}. Calling that method directly is preferred. * * @param byteString the string to check * @return whether the byte array is round trippable */ public static boolean isValidUtf8(ByteString byteString) { - int index = 0; - int size = byteString.size(); - // To avoid the masking, we could change this to use bytes; - // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80 - // gets turned into X >= 0, etc. - - while (index < size) { - int byte1 = byteString.byteAt(index++) & 0xFF; - if (byte1 < 0x80) { - // fast loop for single bytes - continue; - - // we know from this point on that we have 2-4 byte forms - } else if (byte1 < 0xC2 || byte1 > 0xF4) { - // catch illegal first bytes: < C2 or > F4 - return false; - } - if (index >= size) { - // fail if we run out of bytes - return false; - } - int byte2 = byteString.byteAt(index++) & 0xFF; - if (byte2 < 0x80 || byte2 > 0xBF) { - // general trail-byte test - return false; - } - if (byte1 <= 0xDF) { - // two-byte form; general trail-byte test is sufficient - continue; - } - - // we know from this point on that we have 3 or 4 byte forms - if (index >= size) { - // fail if we run out of bytes - return false; - } - int byte3 = byteString.byteAt(index++) & 0xFF; - if (byte3 < 0x80 || byte3 > 0xBF) { - // general trail-byte test - return false; - } - if (byte1 <= 0xEF) { - // three-byte form. Vastly more frequent than four-byte forms - // The following has an extra test, but not worth restructuring - if (byte1 == 0xE0 && byte2 < 0xA0 || - byte1 == 0xED && byte2 > 0x9F) { - // check special cases of byte2 - return false; - } - - } else { - // four-byte form - - if (index >= size) { - // fail if we run out of bytes - return false; - } - int byte4 = byteString.byteAt(index++) & 0xFF; - if (byte4 < 0x80 || byte4 > 0xBF) { - // general trail-byte test - return false; - } - // The following has an extra test, but not worth restructuring - if (byte1 == 0xF0 && byte2 < 0x90 || - byte1 == 0xF4 && byte2 > 0x8F) { - // check special cases of byte2 - return false; - } - } - } - return true; + return byteString.isValidUtf8(); } /** diff --git a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java index 90f7ffbc..72d7ff7d 100644 --- a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java +++ b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java @@ -40,11 +40,32 @@ import java.io.IOException; */ public class InvalidProtocolBufferException extends IOException { private static final long serialVersionUID = -1616151763072450476L; + private MessageLite unfinishedMessage = null; public InvalidProtocolBufferException(final String description) { super(description); } + /** + * Attaches an unfinished message to the exception to support best-effort + * parsing in {@code Parser} interface. + * + * @return this + */ + public InvalidProtocolBufferException setUnfinishedMessage( + MessageLite unfinishedMessage) { + this.unfinishedMessage = unfinishedMessage; + return this; + } + + /** + * Returns the unfinished message attached to the exception, or null if + * no message is attached. + */ + public MessageLite getUnfinishedMessage() { + return unfinishedMessage; + } + static InvalidProtocolBufferException truncatedMessage() { return new InvalidProtocolBufferException( "While parsing a protocol message, the input ended unexpectedly " + diff --git a/java/src/main/java/com/google/protobuf/LazyField.java b/java/src/main/java/com/google/protobuf/LazyField.java new file mode 100644 index 00000000..df9425eb --- /dev/null +++ b/java/src/main/java/com/google/protobuf/LazyField.java @@ -0,0 +1,216 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map.Entry; + +/** + * LazyField encapsulates the logic of lazily parsing message fields. It stores + * the message in a ByteString initially and then parse it on-demand. + * + * LazyField is thread-compatible e.g. concurrent read are safe, however, + * synchronizations are needed under read/write situations. + * + * Now LazyField is only used to lazily load MessageSet. + * TODO(xiangl): Use LazyField to lazily load all messages. + * + * @author xiangl@google.com (Xiang Li) + */ +class LazyField { + + final private MessageLite defaultInstance; + final private ExtensionRegistryLite extensionRegistry; + + // Mutable because it is initialized lazily. + private ByteString bytes; + private volatile MessageLite value; + private volatile boolean isDirty = false; + + public LazyField(MessageLite defaultInstance, + ExtensionRegistryLite extensionRegistry, ByteString bytes) { + this.defaultInstance = defaultInstance; + this.extensionRegistry = extensionRegistry; + this.bytes = bytes; + } + + public MessageLite getValue() { + ensureInitialized(); + return value; + } + + /** + * LazyField is not thread-safe for write access. Synchronizations are needed + * under read/write situations. + */ + public MessageLite setValue(MessageLite value) { + MessageLite originalValue = this.value; + this.value = value; + bytes = null; + isDirty = true; + return originalValue; + } + + /** + * Due to the optional field can be duplicated at the end of serialized + * bytes, which will make the serialized size changed after LazyField + * parsed. Be careful when using this method. + */ + public int getSerializedSize() { + if (isDirty) { + return value.getSerializedSize(); + } + return bytes.size(); + } + + public ByteString toByteString() { + if (!isDirty) { + return bytes; + } + synchronized (this) { + if (!isDirty) { + return bytes; + } + bytes = value.toByteString(); + isDirty = false; + return bytes; + } + } + + @Override + public int hashCode() { + ensureInitialized(); + return value.hashCode(); + } + + @Override + public boolean equals(Object obj) { + ensureInitialized(); + return value.equals(obj); + } + + @Override + public String toString() { + ensureInitialized(); + return value.toString(); + } + + private void ensureInitialized() { + if (value != null) { + return; + } + synchronized (this) { + if (value != null) { + return; + } + try { + if (bytes != null) { + value = defaultInstance.getParserForType() + .parseFrom(bytes, extensionRegistry); + } + } catch (IOException e) { + // TODO(xiangl): Refactory the API to support the exception thrown from + // lazily load messages. + } + } + } + + // ==================================================== + + /** + * LazyEntry and LazyIterator are used to encapsulate the LazyField, when + * users iterate all fields from FieldSet. + */ + static class LazyEntry<K> implements Entry<K, Object> { + private Entry<K, LazyField> entry; + + private LazyEntry(Entry<K, LazyField> entry) { + this.entry = entry; + } + + @Override + public K getKey() { + return entry.getKey(); + } + + @Override + public Object getValue() { + LazyField field = entry.getValue(); + if (field == null) { + return null; + } + return field.getValue(); + } + + public LazyField getField() { + return entry.getValue(); + } + + @Override + public Object setValue(Object value) { + if (!(value instanceof MessageLite)) { + throw new IllegalArgumentException( + "LazyField now only used for MessageSet, " + + "and the value of MessageSet must be an instance of MessageLite"); + } + return entry.getValue().setValue((MessageLite) value); + } + } + + static class LazyIterator<K> implements Iterator<Entry<K, Object>> { + private Iterator<Entry<K, Object>> iterator; + + public LazyIterator(Iterator<Entry<K, Object>> iterator) { + this.iterator = iterator; + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @SuppressWarnings("unchecked") + @Override + public Entry<K, Object> next() { + Entry<K, ?> entry = iterator.next(); + if (entry.getValue() instanceof LazyField) { + return new LazyEntry<K>((Entry<K, LazyField>) entry); + } + return (Entry<K, Object>) entry; + } + + @Override + public void remove() { + iterator.remove(); + } + } +} diff --git a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java index 1683a640..75c6a4b7 100644 --- a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java +++ b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java @@ -33,8 +33,9 @@ package com.google.protobuf; import java.util.List; import java.util.AbstractList; import java.util.ArrayList; -import java.util.RandomAccess; import java.util.Collection; +import java.util.Collections; +import java.util.RandomAccess; /** * An implementation of {@link LazyStringList} that wraps an ArrayList. Each @@ -72,6 +73,11 @@ public class LazyStringArrayList extends AbstractList<String> list = new ArrayList<Object>(); } + public LazyStringArrayList(LazyStringList from) { + list = new ArrayList<Object>(from.size()); + addAll(from); + } + public LazyStringArrayList(List<String> from) { list = new ArrayList<Object>(from); } @@ -84,7 +90,7 @@ public class LazyStringArrayList extends AbstractList<String> } else { ByteString bs = (ByteString) o; String s = bs.toStringUtf8(); - if (Internal.isValidUtf8(bs)) { + if (bs.isValidUtf8()) { list.set(index, s); } return s; @@ -109,8 +115,21 @@ public class LazyStringArrayList extends AbstractList<String> } @Override + public boolean addAll(Collection<? extends String> c) { + // The default implementation of AbstractCollection.addAll(Collection) + // delegates to add(Object). This implementation instead delegates to + // addAll(int, Collection), which makes a special case for Collections + // which are instances of LazyStringList. + return addAll(size(), c); + } + + @Override public boolean addAll(int index, Collection<? extends String> c) { - boolean ret = list.addAll(index, c); + // When copying from another LazyStringList, directly copy the underlying + // elements rather than forcing each element to be decoded to a String. + Collection<?> collection = c instanceof LazyStringList + ? ((LazyStringList) c).getUnderlyingElements() : c; + boolean ret = list.addAll(index, collection); modCount++; return ret; } @@ -152,4 +171,9 @@ public class LazyStringArrayList extends AbstractList<String> return ((ByteString) o).toStringUtf8(); } } + + @Override + public List<?> getUnderlyingElements() { + return Collections.unmodifiableList(list); + } } diff --git a/java/src/main/java/com/google/protobuf/LazyStringList.java b/java/src/main/java/com/google/protobuf/LazyStringList.java index 97139ca6..630932fe 100644 --- a/java/src/main/java/com/google/protobuf/LazyStringList.java +++ b/java/src/main/java/com/google/protobuf/LazyStringList.java @@ -33,7 +33,7 @@ package com.google.protobuf; import java.util.List; /** - * An interface extending List<String> that also provides access to the + * An interface extending {@code List<String>} that also provides access to the * items of the list as UTF8-encoded ByteString objects. This is used by the * protocol buffer implementation to support lazily converting bytes parsed * over the wire to String objects until needed and also increases the @@ -41,9 +41,9 @@ import java.util.List; * ByteString is already cached. * <p> * This only adds additional methods that are required for the use in the - * protocol buffer code in order to be able successfuly round trip byte arrays + * protocol buffer code in order to be able successfully round trip byte arrays * through parsing and serialization without conversion to strings. It's not - * attempting to support the functionality of say List<ByteString>, hence + * attempting to support the functionality of say {@code List<ByteString>}, hence * why only these two very specific methods are added. * * @author jonp@google.com (Jon Perlow) @@ -56,7 +56,7 @@ public interface LazyStringList extends List<String> { * @param index index of the element to return * @return the element at the specified position in this list * @throws IndexOutOfBoundsException if the index is out of range - * (<tt>index < 0 || index >= size()</tt>) + * ({@code index < 0 || index >= size()}) */ ByteString getByteString(int index); @@ -69,4 +69,13 @@ public interface LazyStringList extends List<String> { * is not supported by this list */ void add(ByteString element); + + /** + * Returns an unmodifiable List of the underlying elements, each of + * which is either a {@code String} or its equivalent UTF-8 encoded + * {@code ByteString}. It is an error for the caller to modify the returned + * List, and attempting to do so will result in an + * {@link UnsupportedOperationException}. + */ + List<?> getUnderlyingElements(); } diff --git a/java/src/main/java/com/google/protobuf/LiteralByteString.java b/java/src/main/java/com/google/protobuf/LiteralByteString.java new file mode 100644 index 00000000..93c53dce --- /dev/null +++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java @@ -0,0 +1,349 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * This class implements a {@link com.google.protobuf.ByteString} backed by a + * single array of bytes, contiguous in memory. It supports substring by + * pointing to only a sub-range of the underlying byte array, meaning that a + * substring will reference the full byte-array of the string it's made from, + * exactly as with {@link String}. + * + * @author carlanton@google.com (Carl Haverl) + */ +class LiteralByteString extends ByteString { + + protected final byte[] bytes; + + /** + * Creates a {@code LiteralByteString} backed by the given array, without + * copying. + * + * @param bytes array to wrap + */ + LiteralByteString(byte[] bytes) { + this.bytes = bytes; + } + + @Override + public byte byteAt(int index) { + // Unlike most methods in this class, this one is a direct implementation + // ignoring the potential offset because we need to do range-checking in the + // substring case anyway. + return bytes[index]; + } + + @Override + public int size() { + return bytes.length; + } + + // ================================================================= + // ByteString -> substring + + @Override + public ByteString substring(int beginIndex, int endIndex) { + if (beginIndex < 0) { + throw new IndexOutOfBoundsException( + "Beginning index: " + beginIndex + " < 0"); + } + if (endIndex > size()) { + throw new IndexOutOfBoundsException("End index: " + endIndex + " > " + + size()); + } + int substringLength = endIndex - beginIndex; + if (substringLength < 0) { + throw new IndexOutOfBoundsException( + "Beginning index larger than ending index: " + beginIndex + ", " + + endIndex); + } + + ByteString result; + if (substringLength == 0) { + result = ByteString.EMPTY; + } else { + result = new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex, + substringLength); + } + return result; + } + + // ================================================================= + // ByteString -> byte[] + + @Override + protected void copyToInternal(byte[] target, int sourceOffset, + int targetOffset, int numberToCopy) { + // Optimized form, not for subclasses, since we don't call + // getOffsetIntoBytes() or check the 'numberToCopy' parameter. + System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy); + } + + @Override + public void copyTo(ByteBuffer target) { + target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes + } + + @Override + public ByteBuffer asReadOnlyByteBuffer() { + ByteBuffer byteBuffer = + ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size()); + return byteBuffer.asReadOnlyBuffer(); + } + + @Override + public List<ByteBuffer> asReadOnlyByteBufferList() { + // Return the ByteBuffer generated by asReadOnlyByteBuffer() as a singleton + List<ByteBuffer> result = new ArrayList<ByteBuffer>(1); + result.add(asReadOnlyByteBuffer()); + return result; + } + + @Override + public void writeTo(OutputStream outputStream) throws IOException { + outputStream.write(toByteArray()); + } + + @Override + public String toString(String charsetName) + throws UnsupportedEncodingException { + return new String(bytes, getOffsetIntoBytes(), size(), charsetName); + } + + // ================================================================= + // UTF-8 decoding + + @Override + public boolean isValidUtf8() { + int offset = getOffsetIntoBytes(); + return Utf8.isValidUtf8(bytes, offset, offset + size()); + } + + @Override + protected int partialIsValidUtf8(int state, int offset, int length) { + int index = getOffsetIntoBytes() + offset; + return Utf8.partialIsValidUtf8(state, bytes, index, index + length); + } + + // ================================================================= + // equals() and hashCode() + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (!(other instanceof ByteString)) { + return false; + } + + if (size() != ((ByteString) other).size()) { + return false; + } + if (size() == 0) { + return true; + } + + if (other instanceof LiteralByteString) { + return equalsRange((LiteralByteString) other, 0, size()); + } else if (other instanceof RopeByteString) { + return other.equals(this); + } else { + throw new IllegalArgumentException( + "Has a new type of ByteString been created? Found " + + other.getClass()); + } + } + + /** + * Check equality of the substring of given length of this object starting at + * zero with another {@code LiteralByteString} substring starting at offset. + * + * @param other what to compare a substring in + * @param offset offset into other + * @param length number of bytes to compare + * @return true for equality of substrings, else false. + */ + boolean equalsRange(LiteralByteString other, int offset, int length) { + if (length > other.size()) { + throw new IllegalArgumentException( + "Length too large: " + length + size()); + } + if (offset + length > other.size()) { + throw new IllegalArgumentException( + "Ran off end of other: " + offset + ", " + length + ", " + + other.size()); + } + + byte[] thisBytes = bytes; + byte[] otherBytes = other.bytes; + int thisLimit = getOffsetIntoBytes() + length; + for (int thisIndex = getOffsetIntoBytes(), otherIndex = + other.getOffsetIntoBytes() + offset; + (thisIndex < thisLimit); ++thisIndex, ++otherIndex) { + if (thisBytes[thisIndex] != otherBytes[otherIndex]) { + return false; + } + } + return true; + } + + /** + * Cached hash value. Intentionally accessed via a data race, which + * is safe because of the Java Memory Model's "no out-of-thin-air values" + * guarantees for ints. + */ + private int hash = 0; + + /** + * Compute the hashCode using the traditional algorithm from {@link + * ByteString}. + * + * @return hashCode value + */ + @Override + public int hashCode() { + int h = hash; + + if (h == 0) { + int size = size(); + h = partialHash(size, 0, size); + if (h == 0) { + h = 1; + } + hash = h; + } + return h; + } + + @Override + protected int peekCachedHashCode() { + return hash; + } + + @Override + protected int partialHash(int h, int offset, int length) { + byte[] thisBytes = bytes; + for (int i = getOffsetIntoBytes() + offset, limit = i + length; i < limit; + i++) { + h = h * 31 + thisBytes[i]; + } + return h; + } + + // ================================================================= + // Input stream + + @Override + public InputStream newInput() { + return new ByteArrayInputStream(bytes, getOffsetIntoBytes(), + size()); // No copy + } + + @Override + public CodedInputStream newCodedInput() { + // We trust CodedInputStream not to modify the bytes, or to give anyone + // else access to them. + return CodedInputStream + .newInstance(bytes, getOffsetIntoBytes(), size()); // No copy + } + + // ================================================================= + // ByteIterator + + @Override + public ByteIterator iterator() { + return new LiteralByteIterator(); + } + + private class LiteralByteIterator implements ByteIterator { + private int position; + private final int limit; + + private LiteralByteIterator() { + position = 0; + limit = size(); + } + + public boolean hasNext() { + return (position < limit); + } + + public Byte next() { + // Boxing calls Byte.valueOf(byte), which does not instantiate. + return nextByte(); + } + + public byte nextByte() { + try { + return bytes[position++]; + } catch (ArrayIndexOutOfBoundsException e) { + throw new NoSuchElementException(e.getMessage()); + } + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + // ================================================================= + // Internal methods + + @Override + protected int getTreeDepth() { + return 0; + } + + @Override + protected boolean isBalanced() { + return true; + } + + /** + * Offset into {@code bytes[]} to use, non-zero for substrings. + * + * @return always 0 for this class + */ + protected int getOffsetIntoBytes() { + return 0; + } +} diff --git a/java/src/main/java/com/google/protobuf/Message.java b/java/src/main/java/com/google/protobuf/Message.java index 67c4148e..2b881413 100644 --- a/java/src/main/java/com/google/protobuf/Message.java +++ b/java/src/main/java/com/google/protobuf/Message.java @@ -50,25 +50,28 @@ import java.util.Map; */ public interface Message extends MessageLite, MessageOrBuilder { + // (From MessageLite, re-declared here only for return type covariance.) + Parser<? extends Message> getParserForType(); + // ----------------------------------------------------------------- // Comparison and hashing /** * Compares the specified object with this message for equality. Returns - * <tt>true</tt> if the given object is a message of the same type (as + * {@code true} if the given object is a message of the same type (as * defined by {@code getDescriptorForType()}) and has identical values for * all of its fields. Subclasses must implement this; inheriting * {@code Object.equals()} is incorrect. * * @param other object to be compared for equality with this message - * @return <tt>true</tt> if the specified object is equal to this message + * @return {@code true} if the specified object is equal to this message */ @Override boolean equals(Object other); /** * Returns the hash code value for this message. The hash code of a message - * should mix the message's type (object identity of the decsriptor) with its + * should mix the message's type (object identity of the descriptor) with its * contents (known and unknown field values). Subclasses must implement this; * inheriting {@code Object.hashCode()} is incorrect. * @@ -83,7 +86,8 @@ public interface Message extends MessageLite, MessageOrBuilder { /** * Converts the message to a string in protocol buffer text format. This is - * just a trivial wrapper around {@link TextFormat#printToString(Message)}. + * just a trivial wrapper around {@link + * TextFormat#printToString(MessageOrBuilder)}. */ @Override String toString(); @@ -145,6 +149,24 @@ public interface Message extends MessageLite, MessageOrBuilder { Builder newBuilderForField(Descriptors.FieldDescriptor field); /** + * Get a nested builder instance for the given field. + * <p> + * Normally, we hold a reference to the immutable message object for the + * message type field. Some implementations(the generated message builders), + * however, can also hold a reference to the builder object (a nested + * builder) for the field. + * <p> + * If the field is already backed up by a nested builder, the nested builder + * will be returned. Otherwise, a new field builder will be created and + * returned. The original message field (if exist) will be merged into the + * field builder, which will then be nested into its parent builder. + * <p> + * NOTE: implementations that do not support nested builders will throw + * <code>UnsupportedException</code>. + */ + Builder getFieldBuilder(Descriptors.FieldDescriptor field); + + /** * Sets a field to the given value. The value must be of the correct type * for this field, i.e. the same type that * {@link Message#getField(Descriptors.FieldDescriptor)} would return. diff --git a/java/src/main/java/com/google/protobuf/MessageLite.java b/java/src/main/java/com/google/protobuf/MessageLite.java index 31b8256e..e5b9a47b 100644 --- a/java/src/main/java/com/google/protobuf/MessageLite.java +++ b/java/src/main/java/com/google/protobuf/MessageLite.java @@ -79,6 +79,12 @@ public interface MessageLite extends MessageLiteOrBuilder { */ int getSerializedSize(); + + /** + * Gets the parser for a message of the same type as this message. + */ + Parser<? extends MessageLite> getParserForType(); + // ----------------------------------------------------------------- // Convenience methods. @@ -144,11 +150,8 @@ public interface MessageLite extends MessageLiteOrBuilder { Builder clear(); /** - * Construct the final message. Once this is called, the Builder is no - * longer valid, and calling any other method will result in undefined - * behavior and may throw a NullPointerException. If you need to continue - * working with the builder after calling {@code build()}, {@code clone()} - * it first. + * Constructs the message based on the state of the Builder. Subsequent + * changes to the Builder will not affect the returned message. * @throws UninitializedMessageException The message is missing one or more * required fields (i.e. {@link #isInitialized()} returns false). * Use {@link #buildPartial()} to bypass this check. @@ -158,11 +161,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Like {@link #build()}, but does not throw an exception if the message * is missing required fields. Instead, a partial message is returned. - * Once this is called, the Builder is no longer valid, and calling any - * will result in undefined behavior and may throw a NullPointerException. - * - * If you need to continue working with the builder after calling - * {@code buildPartial()}, {@code clone()} it first. + * Subsequent changes to the Builder will not affect the returned message. */ MessageLite buildPartial(); @@ -174,7 +173,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Parses a message of this type from the input and merges it with this - * message, as if using {@link Builder#mergeFrom(MessageLite)}. + * message. * * <p>Warning: This does not verify that all required fields are present in * the input message. If you call {@link #build()} without setting all @@ -184,11 +183,6 @@ public interface MessageLite extends MessageLiteOrBuilder { * <ul> * <li>Call {@link #isInitialized()} to verify that all required fields * are set before building. - * <li>Parse the message separately using one of the static - * {@code parseFrom} methods, then use {@link #mergeFrom(MessageLite)} - * to merge it with this one. {@code parseFrom} will throw an - * {@link InvalidProtocolBufferException} (an {@code IOException}) - * if some required fields are missing. * <li>Use {@code buildPartial()} to build, which ignores missing * required fields. * </ul> @@ -225,7 +219,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Parse {@code data} as a message of this type and merge it with the * message being built. This is just a small wrapper around - * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. + * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}. * * @return this */ @@ -255,7 +249,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Parse {@code data} as a message of this type and merge it with the * message being built. This is just a small wrapper around - * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. + * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}. * * @return this */ @@ -266,7 +260,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Parse {@code data} as a message of this type and merge it with the * message being built. This is just a small wrapper around - * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. + * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}. * * @return this */ @@ -293,7 +287,7 @@ public interface MessageLite extends MessageLiteOrBuilder { /** * Parse a message of this type from {@code input} and merge it with the * message being built. This is just a small wrapper around - * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. + * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}. * * @return this */ @@ -308,9 +302,9 @@ public interface MessageLite extends MessageLiteOrBuilder { * {@link MessageLite#writeDelimitedTo(OutputStream)} to write messages in * this format. * - * @returns True if successful, or false if the stream is at EOF when the - * method starts. Any other error (including reaching EOF during - * parsing) will cause an exception to be thrown. + * @return True if successful, or false if the stream is at EOF when the + * method starts. Any other error (including reaching EOF during + * parsing) will cause an exception to be thrown. */ boolean mergeDelimitedFrom(InputStream input) throws IOException; diff --git a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java index 7cc72e9c..05b2b161 100644 --- a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java +++ b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java @@ -52,6 +52,8 @@ public interface MessageLiteOrBuilder { /** * Returns true if all required fields in the message and all embedded * messages are set, false otherwise. + * + * <p>See also: {@link MessageOrBuilder#getInitializationErrorString()} */ boolean isInitialized(); diff --git a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java index 0132e7ca..bf62d45e 100644 --- a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java +++ b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java @@ -30,6 +30,7 @@ package com.google.protobuf; +import java.util.List; import java.util.Map; /** @@ -45,6 +46,24 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder { Message getDefaultInstanceForType(); /** + * Returns a list of field paths (e.g. "foo.bar.baz") of required fields + * which are not set in this message. You should call + * {@link MessageLiteOrBuilder#isInitialized()} first to check if there + * are any missing fields, as that method is likely to be much faster + * than this one even when the message is fully-initialized. + */ + List<String> findInitializationErrors(); + + /** + * Returns a comma-delimited list of required fields which are not set + * in this message object. You should call + * {@link MessageLiteOrBuilder#isInitialized()} first to check if there + * are any missing fields, as that method is likely to be much faster + * than this one even when the message is fully-initialized. + */ + String getInitializationErrorString(); + + /** * Get the message's type's descriptor. This differs from the * {@code getDescriptor()} method of generated message classes in that * this method is an abstract method of the {@code Message} interface @@ -80,7 +99,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder { /** * Obtains the value of the given field, or the default value if it is * not set. For primitive fields, the boxed primitive value is returned. - * For enum fields, the EnumValueDescriptor for the value is returend. For + * For enum fields, the EnumValueDescriptor for the value is returned. For * embedded message fields, the sub-message is returned. For repeated * fields, a java.util.List is returned. */ @@ -98,7 +117,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder { /** * Gets an element of a repeated field. For primitive fields, the boxed * primitive value is returned. For enum fields, the EnumValueDescriptor - * for the value is returend. For embedded message fields, the sub-message + * for the value is returned. For embedded message fields, the sub-message * is returned. * @throws IllegalArgumentException The field is not a repeated field, or * {@code field.getContainingType() != getDescriptorForType()}. diff --git a/java/src/main/java/com/google/protobuf/Parser.java b/java/src/main/java/com/google/protobuf/Parser.java new file mode 100644 index 00000000..7d8e8217 --- /dev/null +++ b/java/src/main/java/com/google/protobuf/Parser.java @@ -0,0 +1,259 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.io.InputStream; + +/** + * Abstract interface for parsing Protocol Messages. + * + * @author liujisi@google.com (Pherl Liu) + */ +public interface Parser<MessageType> { + /** + * Parses a message of {@code MessageType} from the input. + * + * <p>Note: The caller should call + * {@link CodedInputStream#checkLastTagWas(int)} after calling this to + * verify that the last tag seen was the appropriate end-group tag, + * or zero for EOF. + */ + public MessageType parseFrom(CodedInputStream input) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(CodedInputStream)}, but also parses extensions. + * The extensions that you want to be able to parse must be registered in + * {@code extensionRegistry}. Extensions not in the registry will be treated + * as unknown fields. + */ + public MessageType parseFrom(CodedInputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(CodedInputStream)}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialFrom(CodedInputStream input) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(CodedInputStream input, ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialFrom(CodedInputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + // --------------------------------------------------------------- + // Convenience methods. + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}. + */ + public MessageType parseFrom(ByteString data) + throws InvalidProtocolBufferException; + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around + * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}. + */ + public MessageType parseFrom(ByteString data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(ByteString)}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialFrom(ByteString data) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialFrom(ByteString data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}. + */ + public MessageType parseFrom(byte[] data, int off, int len) + throws InvalidProtocolBufferException; + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around + * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}. + */ + public MessageType parseFrom(byte[] data, int off, int len, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}. + */ + public MessageType parseFrom(byte[] data) + throws InvalidProtocolBufferException; + + /** + * Parses {@code data} as a message of {@code MessageType}. + * This is just a small wrapper around + * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}. + */ + public MessageType parseFrom(byte[] data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(byte[], int, int)}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialFrom(byte[] data, int off, int len) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialFrom(byte[] data, int off, int len, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(byte[])}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialFrom(byte[] data) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(byte[], ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialFrom(byte[] data, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Parse a message of {@code MessageType} from {@code input}. + * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}. + * Note that this method always reads the <i>entire</i> input (unless it + * throws an exception). If you want it to stop earlier, you will need to + * wrap your input in some wrapper stream that limits reading. Or, use + * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write your + * message and {@link #parseDelimitedFrom(InputStream)} to read it. + * <p> + * Despite usually reading the entire input, this does not close the stream. + */ + public MessageType parseFrom(InputStream input) + throws InvalidProtocolBufferException; + + /** + * Parses a message of {@code MessageType} from {@code input}. + * This is just a small wrapper around + * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}. + */ + public MessageType parseFrom(InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(InputStream)}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialFrom(InputStream input) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(InputStream, ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialFrom(InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseFrom(InputStream)}, but does not read util EOF. + * Instead, the size of message (encoded as a varint) is read first, + * then the message data. Use + * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write + * messages in this format. + * + * @return True if successful, or false if the stream is at EOF when the + * method starts. Any other error (including reaching EOF during + * parsing) will cause an exception to be thrown. + */ + public MessageType parseDelimitedFrom(InputStream input) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseDelimitedFrom(InputStream)} but supporting extensions. + */ + public MessageType parseDelimitedFrom(InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseDelimitedFrom(InputStream)}, but does not throw an + * exception if the message is missing required fields. Instead, a partial + * message is returned. + */ + public MessageType parsePartialDelimitedFrom(InputStream input) + throws InvalidProtocolBufferException; + + /** + * Like {@link #parseDelimitedFrom(InputStream, ExtensionRegistryLite)}, + * but does not throw an exception if the message is missing required fields. + * Instead, a partial message is returned. + */ + public MessageType parsePartialDelimitedFrom( + InputStream input, + ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException; +} diff --git a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java index 0024f791..65d9270d 100644 --- a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java +++ b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java @@ -37,22 +37,22 @@ import java.util.Collections; import java.util.List; /** - * <code>RepeatedFieldBuilder</code> implements a structure that a protocol + * {@code RepeatedFieldBuilder} implements a structure that a protocol * message uses to hold a repeated field of other protocol messages. It supports * the classical use case of adding immutable {@link Message}'s to the * repeated field and is highly optimized around this (no extra memory * allocations and sharing of immutable arrays). * <br> * It also supports the additional use case of adding a {@link Message.Builder} - * to the repeated field and deferring conversion of that <code>Builder</code> - * to an immutable <code>Message</code>. In this way, it's possible to maintain - * a tree of <code>Builder</code>'s that acts as a fully read/write data + * to the repeated field and deferring conversion of that {@code Builder} + * to an immutable {@code Message}. In this way, it's possible to maintain + * a tree of {@code Builder}'s that acts as a fully read/write data * structure. * <br> * Logically, one can think of a tree of builders as converting the entire tree * to messages when build is called on the root or when any method is called * that desires a Message instead of a Builder. In terms of the implementation, - * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code> + * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder} * classes cache messages that were created so that messages only need to be * created when some change occured in its builder or a builder for one of its * descendants. @@ -192,7 +192,7 @@ public class RepeatedFieldBuilder /** * Get the message at the specified index. If the message is currently stored - * as a <code>Builder</code>, it is converted to a <code>Message</code> by + * as a {@code Builder}, it is converted to a {@code Message} by * calling {@link Message.Builder#buildPartial} on it. * * @param index the index of the message to get @@ -204,7 +204,7 @@ public class RepeatedFieldBuilder /** * Get the message at the specified index. If the message is currently stored - * as a <code>Builder</code>, it is converted to a <code>Message</code> by + * as a {@code Builder}, it is converted to a {@code Message} by * calling {@link Message.Builder#buildPartial} on it. * * @param index the index of the message to get diff --git a/java/src/main/java/com/google/protobuf/RopeByteString.java b/java/src/main/java/com/google/protobuf/RopeByteString.java new file mode 100644 index 00000000..8d44d117 --- /dev/null +++ b/java/src/main/java/com/google/protobuf/RopeByteString.java @@ -0,0 +1,945 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.io.ByteArrayInputStream; +import java.nio.ByteBuffer; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Class to represent {@code ByteStrings} formed by concatenation of other + * ByteStrings, without copying the data in the pieces. The concatenation is + * represented as a tree whose leaf nodes are each a {@link LiteralByteString}. + * + * <p>Most of the operation here is inspired by the now-famous paper <a + * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> + * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and + * michael plass + * + * <p>The algorithms described in the paper have been implemented for character + * strings in {@link com.google.common.string.Rope} and in the c++ class {@code + * cord.cc}. + * + * <p>Fundamentally the Rope algorithm represents the collection of pieces as a + * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum + * sequence length, sequences that are too short relative to their depth cause a + * tree rebalance. More precisely, a tree of depth d is "balanced" in the + * terminology of BAP95 if its length is at least F(d+2), where F(n) is the + * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum + * lengths 1, 2, 3, 5, 8, 13,... + * + * @author carlanton@google.com (Carl Haverl) + */ +class RopeByteString extends ByteString { + + /** + * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of + * depth n is "balanced", i.e flat enough, if its length is at least Fn+2, + * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at + * least 2, of depth 4 must have length >= 8, etc. + * + * <p>There's nothing special about using the Fibonacci numbers for this, but + * they are a reasonable sequence for encapsulating the idea that we are OK + * with longer strings being encoded in deeper binary trees. + * + * <p>For 32-bit integers, this array has length 46. + */ + private static final int[] minLengthByDepth; + + static { + // Dynamically generate the list of Fibonacci numbers the first time this + // class is accessed. + List<Integer> numbers = new ArrayList<Integer>(); + + // we skip the first Fibonacci number (1). So instead of: 1 1 2 3 5 8 ... + // we have: 1 2 3 5 8 ... + int f1 = 1; + int f2 = 1; + + // get all the values until we roll over. + while (f2 > 0) { + numbers.add(f2); + int temp = f1 + f2; + f1 = f2; + f2 = temp; + } + + // we include this here so that we can index this array to [x + 1] in the + // loops below. + numbers.add(Integer.MAX_VALUE); + minLengthByDepth = new int[numbers.size()]; + for (int i = 0; i < minLengthByDepth.length; i++) { + // unbox all the values + minLengthByDepth[i] = numbers.get(i); + } + } + + private final int totalLength; + private final ByteString left; + private final ByteString right; + private final int leftLength; + private final int treeDepth; + + /** + * Create a new RopeByteString, which can be thought of as a new tree node, by + * recording references to the two given strings. + * + * @param left string on the left of this node, should have {@code size() > + * 0} + * @param right string on the right of this node, should have {@code size() > + * 0} + */ + private RopeByteString(ByteString left, ByteString right) { + this.left = left; + this.right = right; + leftLength = left.size(); + totalLength = leftLength + right.size(); + treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; + } + + /** + * Concatenate the given strings while performing various optimizations to + * slow the growth rate of tree depth and tree node count. The result is + * either a {@link LiteralByteString} or a {@link RopeByteString} + * depending on which optimizations, if any, were applied. + * + * <p>Small pieces of length less than {@link + * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in + * BAP95. Large pieces are referenced without copy. + * + * @param left string on the left + * @param right string on the right + * @return concatenation representing the same sequence as the given strings + */ + static ByteString concatenate(ByteString left, ByteString right) { + ByteString result; + RopeByteString leftRope = + (left instanceof RopeByteString) ? (RopeByteString) left : null; + if (right.size() == 0) { + result = left; + } else if (left.size() == 0) { + result = right; + } else { + int newLength = left.size() + right.size(); + if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) { + // Optimization from BAP95: For short (leaves in paper, but just short + // here) total length, do a copy of data to a new leaf. + result = concatenateBytes(left, right); + } else if (leftRope != null + && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) { + // Optimization from BAP95: As an optimization of the case where the + // ByteString is constructed by repeated concatenate, recognize the case + // where a short string is concatenated to a left-hand node whose + // right-hand branch is short. In the paper this applies to leaves, but + // we just look at the length here. This has the advantage of shedding + // references to unneeded data when substrings have been taken. + // + // When we recognize this case, we do a copy of the data and create a + // new parent node so that the depth of the result is the same as the + // given left tree. + ByteString newRight = concatenateBytes(leftRope.right, right); + result = new RopeByteString(leftRope.left, newRight); + } else if (leftRope != null + && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth() + && leftRope.getTreeDepth() > right.getTreeDepth()) { + // Typically for concatenate-built strings the left-side is deeper than + // the right. This is our final attempt to concatenate without + // increasing the tree depth. We'll redo the the node on the RHS. This + // is yet another optimization for building the string by repeatedly + // concatenating on the right. + ByteString newRight = new RopeByteString(leftRope.right, right); + result = new RopeByteString(leftRope.left, newRight); + } else { + // Fine, we'll add a node and increase the tree depth--unless we + // rebalance ;^) + int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; + if (newLength >= minLengthByDepth[newDepth]) { + // The tree is shallow enough, so don't rebalance + result = new RopeByteString(left, right); + } else { + result = new Balancer().balance(left, right); + } + } + } + return result; + } + + /** + * Concatenates two strings by copying data values. This is called in a few + * cases in order to reduce the growth of the number of tree nodes. + * + * @param left string on the left + * @param right string on the right + * @return string formed by copying data bytes + */ + private static LiteralByteString concatenateBytes(ByteString left, + ByteString right) { + int leftSize = left.size(); + int rightSize = right.size(); + byte[] bytes = new byte[leftSize + rightSize]; + left.copyTo(bytes, 0, 0, leftSize); + right.copyTo(bytes, 0, leftSize, rightSize); + return new LiteralByteString(bytes); // Constructor wraps bytes + } + + /** + * Create a new RopeByteString for testing only while bypassing all the + * defenses of {@link #concatenate(ByteString, ByteString)}. This allows + * testing trees of specific structure. We are also able to insert empty + * leaves, though these are dis-allowed, so that we can make sure the + * implementation can withstand their presence. + * + * @param left string on the left of this node + * @param right string on the right of this node + * @return an unsafe instance for testing only + */ + static RopeByteString newInstanceForTest(ByteString left, ByteString right) { + return new RopeByteString(left, right); + } + + /** + * Gets the byte at the given index. + * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility + * reasons although it would more properly be {@link + * IndexOutOfBoundsException}. + * + * @param index index of byte + * @return the value + * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size + */ + @Override + public byte byteAt(int index) { + if (index < 0) { + throw new ArrayIndexOutOfBoundsException("Index < 0: " + index); + } + if (index > totalLength) { + throw new ArrayIndexOutOfBoundsException( + "Index > length: " + index + ", " + totalLength); + } + + byte result; + // Find the relevant piece by recursive descent + if (index < leftLength) { + result = left.byteAt(index); + } else { + result = right.byteAt(index - leftLength); + } + return result; + } + + @Override + public int size() { + return totalLength; + } + + // ================================================================= + // Pieces + + @Override + protected int getTreeDepth() { + return treeDepth; + } + + /** + * Determines if the tree is balanced according to BAP95, which means the tree + * is flat-enough with respect to the bounds. Note that this definition of + * balanced is one where sub-trees of balanced trees are not necessarily + * balanced. + * + * @return true if the tree is balanced + */ + @Override + protected boolean isBalanced() { + return totalLength >= minLengthByDepth[treeDepth]; + } + + /** + * Takes a substring of this one. This involves recursive descent along the + * left and right edges of the substring, and referencing any wholly contained + * segments in between. Any leaf nodes entirely uninvolved in the substring + * will not be referenced by the substring. + * + * <p>Substrings of {@code length < 2} should result in at most a single + * recursive call chain, terminating at a leaf node. Thus the result will be a + * {@link LiteralByteString}. {@link #RopeByteString(ByteString, + * ByteString)}. + * + * @param beginIndex start at this index + * @param endIndex the last character is the one before this index + * @return substring leaf node or tree + */ + @Override + public ByteString substring(int beginIndex, int endIndex) { + if (beginIndex < 0) { + throw new IndexOutOfBoundsException( + "Beginning index: " + beginIndex + " < 0"); + } + if (endIndex > totalLength) { + throw new IndexOutOfBoundsException( + "End index: " + endIndex + " > " + totalLength); + } + int substringLength = endIndex - beginIndex; + if (substringLength < 0) { + throw new IndexOutOfBoundsException( + "Beginning index larger than ending index: " + beginIndex + ", " + + endIndex); + } + + ByteString result; + if (substringLength == 0) { + // Empty substring + result = ByteString.EMPTY; + } else if (substringLength == totalLength) { + // The whole string + result = this; + } else { + // Proper substring + if (endIndex <= leftLength) { + // Substring on the left + result = left.substring(beginIndex, endIndex); + } else if (beginIndex >= leftLength) { + // Substring on the right + result = right + .substring(beginIndex - leftLength, endIndex - leftLength); + } else { + // Split substring + ByteString leftSub = left.substring(beginIndex); + ByteString rightSub = right.substring(0, endIndex - leftLength); + // Intentionally not rebalancing, since in many cases these two + // substrings will already be less deep than the top-level + // RopeByteString we're taking a substring of. + result = new RopeByteString(leftSub, rightSub); + } + } + return result; + } + + // ================================================================= + // ByteString -> byte[] + + @Override + protected void copyToInternal(byte[] target, int sourceOffset, + int targetOffset, int numberToCopy) { + if (sourceOffset + numberToCopy <= leftLength) { + left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy); + } else if (sourceOffset >= leftLength) { + right.copyToInternal(target, sourceOffset - leftLength, targetOffset, + numberToCopy); + } else { + int leftLength = this.leftLength - sourceOffset; + left.copyToInternal(target, sourceOffset, targetOffset, leftLength); + right.copyToInternal(target, 0, targetOffset + leftLength, + numberToCopy - leftLength); + } + } + + @Override + public void copyTo(ByteBuffer target) { + left.copyTo(target); + right.copyTo(target); + } + + @Override + public ByteBuffer asReadOnlyByteBuffer() { + ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray()); + return byteBuffer.asReadOnlyBuffer(); + } + + @Override + public List<ByteBuffer> asReadOnlyByteBufferList() { + // Walk through the list of LiteralByteString's that make up this + // rope, and add each one as a read-only ByteBuffer. + List<ByteBuffer> result = new ArrayList<ByteBuffer>(); + PieceIterator pieces = new PieceIterator(this); + while (pieces.hasNext()) { + LiteralByteString byteString = pieces.next(); + result.add(byteString.asReadOnlyByteBuffer()); + } + return result; + } + + @Override + public void writeTo(OutputStream outputStream) throws IOException { + left.writeTo(outputStream); + right.writeTo(outputStream); + } + + @Override + public String toString(String charsetName) + throws UnsupportedEncodingException { + return new String(toByteArray(), charsetName); + } + + // ================================================================= + // UTF-8 decoding + + @Override + public boolean isValidUtf8() { + int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength); + int state = right.partialIsValidUtf8(leftPartial, 0, right.size()); + return state == Utf8.COMPLETE; + } + + @Override + protected int partialIsValidUtf8(int state, int offset, int length) { + int toIndex = offset + length; + if (toIndex <= leftLength) { + return left.partialIsValidUtf8(state, offset, length); + } else if (offset >= leftLength) { + return right.partialIsValidUtf8(state, offset - leftLength, length); + } else { + int leftLength = this.leftLength - offset; + int leftPartial = left.partialIsValidUtf8(state, offset, leftLength); + return right.partialIsValidUtf8(leftPartial, 0, length - leftLength); + } + } + + // ================================================================= + // equals() and hashCode() + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (!(other instanceof ByteString)) { + return false; + } + + ByteString otherByteString = (ByteString) other; + if (totalLength != otherByteString.size()) { + return false; + } + if (totalLength == 0) { + return true; + } + + // You don't really want to be calling equals on long strings, but since + // we cache the hashCode, we effectively cache inequality. We use the cached + // hashCode if it's already computed. It's arguable we should compute the + // hashCode here, and if we're going to be testing a bunch of byteStrings, + // it might even make sense. + if (hash != 0) { + int cachedOtherHash = otherByteString.peekCachedHashCode(); + if (cachedOtherHash != 0 && hash != cachedOtherHash) { + return false; + } + } + + return equalsFragments(otherByteString); + } + + /** + * Determines if this string is equal to another of the same length by + * iterating over the leaf nodes. On each step of the iteration, the + * overlapping segments of the leaves are compared. + * + * @param other string of the same length as this one + * @return true if the values of this string equals the value of the given + * one + */ + private boolean equalsFragments(ByteString other) { + int thisOffset = 0; + Iterator<LiteralByteString> thisIter = new PieceIterator(this); + LiteralByteString thisString = thisIter.next(); + + int thatOffset = 0; + Iterator<LiteralByteString> thatIter = new PieceIterator(other); + LiteralByteString thatString = thatIter.next(); + + int pos = 0; + while (true) { + int thisRemaining = thisString.size() - thisOffset; + int thatRemaining = thatString.size() - thatOffset; + int bytesToCompare = Math.min(thisRemaining, thatRemaining); + + // At least one of the offsets will be zero + boolean stillEqual = (thisOffset == 0) + ? thisString.equalsRange(thatString, thatOffset, bytesToCompare) + : thatString.equalsRange(thisString, thisOffset, bytesToCompare); + if (!stillEqual) { + return false; + } + + pos += bytesToCompare; + if (pos >= totalLength) { + if (pos == totalLength) { + return true; + } + throw new IllegalStateException(); + } + // We always get to the end of at least one of the pieces + if (bytesToCompare == thisRemaining) { // If reached end of this + thisOffset = 0; + thisString = thisIter.next(); + } else { + thisOffset += bytesToCompare; + } + if (bytesToCompare == thatRemaining) { // If reached end of that + thatOffset = 0; + thatString = thatIter.next(); + } else { + thatOffset += bytesToCompare; + } + } + } + + /** + * Cached hash value. Intentionally accessed via a data race, which is safe + * because of the Java Memory Model's "no out-of-thin-air values" guarantees + * for ints. + */ + private int hash = 0; + + @Override + public int hashCode() { + int h = hash; + + if (h == 0) { + h = totalLength; + h = partialHash(h, 0, totalLength); + if (h == 0) { + h = 1; + } + hash = h; + } + return h; + } + + @Override + protected int peekCachedHashCode() { + return hash; + } + + @Override + protected int partialHash(int h, int offset, int length) { + int toIndex = offset + length; + if (toIndex <= leftLength) { + return left.partialHash(h, offset, length); + } else if (offset >= leftLength) { + return right.partialHash(h, offset - leftLength, length); + } else { + int leftLength = this.leftLength - offset; + int leftPartial = left.partialHash(h, offset, leftLength); + return right.partialHash(leftPartial, 0, length - leftLength); + } + } + + // ================================================================= + // Input stream + + @Override + public CodedInputStream newCodedInput() { + return CodedInputStream.newInstance(new RopeInputStream()); + } + + @Override + public InputStream newInput() { + return new RopeInputStream(); + } + + /** + * This class implements the balancing algorithm of BAP95. In the paper the + * authors use an array to keep track of pieces, while here we use a stack. + * The tree is balanced by traversing subtrees in left to right order, and the + * stack always contains the part of the string we've traversed so far. + * + * <p>One surprising aspect of the algorithm is the result of balancing is not + * necessarily balanced, though it is nearly balanced. For details, see + * BAP95. + */ + private static class Balancer { + // Stack containing the part of the string, starting from the left, that + // we've already traversed. The final string should be the equivalent of + // concatenating the strings on the stack from bottom to top. + private final Deque<ByteString> prefixesStack = + new ArrayDeque<ByteString>(minLengthByDepth.length); + + private ByteString balance(ByteString left, ByteString right) { + doBalance(left); + doBalance(right); + + // Sweep stack to gather the result + ByteString partialString = prefixesStack.pop(); + while (!prefixesStack.isEmpty()) { + ByteString newLeft = prefixesStack.pop(); + partialString = new RopeByteString(newLeft, partialString); + } + // We should end up with a RopeByteString since at a minimum we will + // create one from concatenating left and right + return partialString; + } + + private void doBalance(ByteString root) { + // BAP95: Insert balanced subtrees whole. This means the result might not + // be balanced, leading to repeated rebalancings on concatenate. However, + // these rebalancings are shallow due to ignoring balanced subtrees, and + // relatively few calls to insert() result. + if (root.isBalanced()) { + insert(root); + } else if (root instanceof RopeByteString) { + RopeByteString rbs = (RopeByteString) root; + doBalance(rbs.left); + doBalance(rbs.right); + } else { + throw new IllegalArgumentException( + "Has a new type of ByteString been created? Found " + + root.getClass()); + } + } + + /** + * Push a string on the balance stack (BAP95). BAP95 uses an array and + * calls the elements in the array 'bins'. We instead use a stack, so the + * 'bins' of lengths are represented by differences between the elements of + * minLengthByDepth. + * + * <p>If the length bin for our string, and all shorter length bins, are + * empty, we just push it on the stack. Otherwise, we need to start + * concatenating, putting the given string in the "middle" and continuing + * until we land in an empty length bin that matches the length of our + * concatenation. + * + * @param byteString string to place on the balance stack + */ + private void insert(ByteString byteString) { + int depthBin = getDepthBinForLength(byteString.size()); + int binEnd = minLengthByDepth[depthBin + 1]; + + // BAP95: Concatenate all trees occupying bins representing the length of + // our new piece or of shorter pieces, to the extent that is possible. + // The goal is to clear the bin which our piece belongs in, but that may + // not be entirely possible if there aren't enough longer bins occupied. + if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) { + prefixesStack.push(byteString); + } else { + int binStart = minLengthByDepth[depthBin]; + + // Concatenate the subtrees of shorter length + ByteString newTree = prefixesStack.pop(); + while (!prefixesStack.isEmpty() + && prefixesStack.peek().size() < binStart) { + ByteString left = prefixesStack.pop(); + newTree = new RopeByteString(left, newTree); + } + + // Concatenate the given string + newTree = new RopeByteString(newTree, byteString); + + // Continue concatenating until we land in an empty bin + while (!prefixesStack.isEmpty()) { + depthBin = getDepthBinForLength(newTree.size()); + binEnd = minLengthByDepth[depthBin + 1]; + if (prefixesStack.peek().size() < binEnd) { + ByteString left = prefixesStack.pop(); + newTree = new RopeByteString(left, newTree); + } else { + break; + } + } + prefixesStack.push(newTree); + } + } + + private int getDepthBinForLength(int length) { + int depth = Arrays.binarySearch(minLengthByDepth, length); + if (depth < 0) { + // It wasn't an exact match, so convert to the index of the containing + // fragment, which is one less even than the insertion point. + int insertionPoint = -(depth + 1); + depth = insertionPoint - 1; + } + + return depth; + } + } + + /** + * This class is a continuable tree traversal, which keeps the state + * information which would exist on the stack in a recursive traversal instead + * on a stack of "Bread Crumbs". The maximum depth of the stack in this + * iterator is the same as the depth of the tree being traversed. + * + * <p>This iterator is used to implement + * {@link RopeByteString#equalsFragments(ByteString)}. + */ + private static class PieceIterator implements Iterator<LiteralByteString> { + + private final Deque<RopeByteString> breadCrumbs = + new ArrayDeque<RopeByteString>(minLengthByDepth.length); + private LiteralByteString next; + + private PieceIterator(ByteString root) { + next = getLeafByLeft(root); + } + + private LiteralByteString getLeafByLeft(ByteString root) { + ByteString pos = root; + while (pos instanceof RopeByteString) { + RopeByteString rbs = (RopeByteString) pos; + breadCrumbs.push(rbs); + pos = rbs.left; + } + return (LiteralByteString) pos; + } + + private LiteralByteString getNextNonEmptyLeaf() { + while (true) { + // Almost always, we go through this loop exactly once. However, if + // we discover an empty string in the rope, we toss it and try again. + if (breadCrumbs.isEmpty()) { + return null; + } else { + LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right); + if (!result.isEmpty()) { + return result; + } + } + } + } + + public boolean hasNext() { + return next != null; + } + + /** + * Returns the next item and advances one {@code LiteralByteString}. + * + * @return next non-empty LiteralByteString or {@code null} + */ + public LiteralByteString next() { + if (next == null) { + throw new NoSuchElementException(); + } + LiteralByteString result = next; + next = getNextNonEmptyLeaf(); + return result; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + // ================================================================= + // ByteIterator + + @Override + public ByteIterator iterator() { + return new RopeByteIterator(); + } + + private class RopeByteIterator implements ByteString.ByteIterator { + + private final PieceIterator pieces; + private ByteIterator bytes; + int bytesRemaining; + + private RopeByteIterator() { + pieces = new PieceIterator(RopeByteString.this); + bytes = pieces.next().iterator(); + bytesRemaining = size(); + } + + public boolean hasNext() { + return (bytesRemaining > 0); + } + + public Byte next() { + return nextByte(); // Does not instantiate a Byte + } + + public byte nextByte() { + if (!bytes.hasNext()) { + bytes = pieces.next().iterator(); + } + --bytesRemaining; + return bytes.nextByte(); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + } + + /** + * This class is the {@link RopeByteString} equivalent for + * {@link ByteArrayInputStream}. + */ + private class RopeInputStream extends InputStream { + // Iterates through the pieces of the rope + private PieceIterator pieceIterator; + // The current piece + private LiteralByteString currentPiece; + // The size of the current piece + private int currentPieceSize; + // The index of the next byte to read in the current piece + private int currentPieceIndex; + // The offset of the start of the current piece in the rope byte string + private int currentPieceOffsetInRope; + // Offset in the buffer at which user called mark(); + private int mark; + + public RopeInputStream() { + initialize(); + } + + @Override + public int read(byte b[], int offset, int length) { + if (b == null) { + throw new NullPointerException(); + } else if (offset < 0 || length < 0 || length > b.length - offset) { + throw new IndexOutOfBoundsException(); + } + return readSkipInternal(b, offset, length); + } + + @Override + public long skip(long length) { + if (length < 0) { + throw new IndexOutOfBoundsException(); + } else if (length > Integer.MAX_VALUE) { + length = Integer.MAX_VALUE; + } + return readSkipInternal(null, 0, (int) length); + } + + /** + * Internal implementation of read and skip. If b != null, then read the + * next {@code length} bytes into the buffer {@code b} at + * offset {@code offset}. If b == null, then skip the next {@code length) + * bytes. + * <p> + * This method assumes that all error checking has already happened. + * <p> + * Returns the actual number of bytes read or skipped. + */ + private int readSkipInternal(byte b[], int offset, int length) { + int bytesRemaining = length; + while (bytesRemaining > 0) { + advanceIfCurrentPieceFullyRead(); + if (currentPiece == null) { + if (bytesRemaining == length) { + // We didn't manage to read anything + return -1; + } + break; + } else { + // Copy the bytes from this piece. + int currentPieceRemaining = currentPieceSize - currentPieceIndex; + int count = Math.min(currentPieceRemaining, bytesRemaining); + if (b != null) { + currentPiece.copyTo(b, currentPieceIndex, offset, count); + offset += count; + } + currentPieceIndex += count; + bytesRemaining -= count; + } + } + // Return the number of bytes read. + return length - bytesRemaining; + } + + @Override + public int read() throws IOException { + advanceIfCurrentPieceFullyRead(); + if (currentPiece == null) { + return -1; + } else { + return currentPiece.byteAt(currentPieceIndex++) & 0xFF; + } + } + + @Override + public int available() throws IOException { + int bytesRead = currentPieceOffsetInRope + currentPieceIndex; + return RopeByteString.this.size() - bytesRead; + } + + @Override + public boolean markSupported() { + return true; + } + + @Override + public void mark(int readAheadLimit) { + // Set the mark to our position in the byte string + mark = currentPieceOffsetInRope + currentPieceIndex; + } + + @Override + public synchronized void reset() { + // Just reinitialize and skip the specified number of bytes. + initialize(); + readSkipInternal(null, 0, mark); + } + + /** Common initialization code used by both the constructor and reset() */ + private void initialize() { + pieceIterator = new PieceIterator(RopeByteString.this); + currentPiece = pieceIterator.next(); + currentPieceSize = currentPiece.size(); + currentPieceIndex = 0; + currentPieceOffsetInRope = 0; + } + + /** + * Skips to the next piece if we have read all the data in the current + * piece. Sets currentPiece to null if we have reached the end of the + * input. + */ + private void advanceIfCurrentPieceFullyRead() { + if (currentPiece != null && currentPieceIndex == currentPieceSize) { + // Generally, we can only go through this loop at most once, since + // empty strings can't end up in a rope. But better to test. + currentPieceOffsetInRope += currentPieceSize; + currentPieceIndex = 0; + if (pieceIterator.hasNext()) { + currentPiece = pieceIterator.next(); + currentPieceSize = currentPiece.size(); + } else { + currentPiece = null; + currentPieceSize = 0; + } + } + } + } +} diff --git a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java index d4475f66..4bfc9f34 100644 --- a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java +++ b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java @@ -31,21 +31,21 @@ package com.google.protobuf; /** - * <code>SingleFieldBuilder</code> implements a structure that a protocol + * {@code SingleFieldBuilder} implements a structure that a protocol * message uses to hold a single field of another protocol message. It supports * the classical use case of setting an immutable {@link Message} as the value * of the field and is highly optimized around this. * <br> * It also supports the additional use case of setting a {@link Message.Builder} - * as the field and deferring conversion of that <code>Builder</code> - * to an immutable <code>Message</code>. In this way, it's possible to maintain - * a tree of <code>Builder</code>'s that acts as a fully read/write data + * as the field and deferring conversion of that {@code Builder} + * to an immutable {@code Message}. In this way, it's possible to maintain + * a tree of {@code Builder}'s that acts as a fully read/write data * structure. * <br> * Logically, one can think of a tree of builders as converting the entire tree * to messages when build is called on the root or when any method is called * that desires a Message instead of a Builder. In terms of the implementation, - * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code> + * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder} * classes cache messages that were created so that messages only need to be * created when some change occured in its builder or a builder for one of its * descendants. @@ -99,7 +99,7 @@ public class SingleFieldBuilder /** * Get the message for the field. If the message is currently stored - * as a <code>Builder</code>, it is converted to a <code>Message</code> by + * as a {@code Builder}, it is converted to a {@code Message} by * calling {@link Message.Builder#buildPartial} on it. If no message has * been set, returns the default instance of the message. * diff --git a/java/src/main/java/com/google/protobuf/SmallSortedMap.java b/java/src/main/java/com/google/protobuf/SmallSortedMap.java index 1cf270f3..c6cad6af 100644 --- a/java/src/main/java/com/google/protobuf/SmallSortedMap.java +++ b/java/src/main/java/com/google/protobuf/SmallSortedMap.java @@ -51,14 +51,14 @@ import java.util.SortedMap; * remaining entries are stored in an overflow map. Iteration over the entries * in the map should be done as follows: * - * <pre> - * for (int i = 0; i < fieldMap.getNumArrayEntries(); i++) { + * <pre> {@code + * for (int i = 0; i < fieldMap.getNumArrayEntries(); i++) { * process(fieldMap.getArrayEntryAt(i)); * } - * for (Map.Entry<K, V> entry : fieldMap.getOverflowEntries()) { + * for (Map.Entry<K, V> entry : fieldMap.getOverflowEntries()) { * process(entry); * } - * </pre> + * }</pre> * * The resulting iteration is in order of ascending field tag number. The * object returned by {@link #entrySet()} adheres to the same contract but is @@ -394,7 +394,7 @@ class SmallSortedMap<K extends Comparable<K>, V> extends AbstractMap<K, V> { /** * Entry implementation that implements Comparable in order to support - * binary search witin the entry array. Also checks mutability in + * binary search within the entry array. Also checks mutability in * {@link #setValue()}. */ private class Entry implements Map.Entry<K, V>, Comparable<Entry> { diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java index d5fbdabf..ed462899 100644 --- a/java/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/src/main/java/com/google/protobuf/TextFormat.java @@ -55,15 +55,18 @@ import java.util.regex.Pattern; public final class TextFormat { private TextFormat() {} - private static final Printer DEFAULT_PRINTER = new Printer(false); - private static final Printer SINGLE_LINE_PRINTER = new Printer(true); + private static final Printer DEFAULT_PRINTER = new Printer(); + private static final Printer SINGLE_LINE_PRINTER = + (new Printer()).setSingleLineMode(true); + private static final Printer UNICODE_PRINTER = + (new Printer()).setEscapeNonAscii(false); /** * Outputs a textual representation of the Protocol Message supplied into * the parameter output. (This representation is the new version of the * classic "ProtocolPrinter" output from the original Protocol Buffer system) */ - public static void print(final Message message, final Appendable output) + public static void print(final MessageOrBuilder message, final Appendable output) throws IOException { DEFAULT_PRINTER.print(message, new TextGenerator(output)); } @@ -79,7 +82,7 @@ public final class TextFormat { * Generates a human readable form of this message, useful for debugging and * other purposes, with no newline characters. */ - public static String shortDebugString(final Message message) { + public static String shortDebugString(final MessageOrBuilder message) { try { final StringBuilder sb = new StringBuilder(); SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); @@ -109,7 +112,7 @@ public final class TextFormat { * Like {@code print()}, but writes directly to a {@code String} and * returns it. */ - public static String printToString(final Message message) { + public static String printToString(final MessageOrBuilder message) { try { final StringBuilder text = new StringBuilder(); print(message, text); @@ -133,6 +136,34 @@ public final class TextFormat { } } + /** + * Same as {@code printToString()}, except that non-ASCII characters + * in string type fields are not escaped in backslash+octals. + */ + public static String printToUnicodeString(final MessageOrBuilder message) { + try { + final StringBuilder text = new StringBuilder(); + UNICODE_PRINTER.print(message, new TextGenerator(text)); + return text.toString(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + /** + * Same as {@code printToString()}, except that non-ASCII characters + * in string type fields are not escaped in backslash+octals. + */ + public static String printToUnicodeString(final UnknownFieldSet fields) { + try { + final StringBuilder text = new StringBuilder(); + UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text)); + return text.toString(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + public static void printField(final FieldDescriptor field, final Object value, final Appendable output) @@ -216,13 +247,26 @@ public final class TextFormat { /** Helper class for converting protobufs to text. */ private static final class Printer { /** Whether to omit newlines from the output. */ - final boolean singleLineMode; + boolean singleLineMode = false; + + /** Whether to escape non ASCII characters with backslash and octal. */ + boolean escapeNonAscii = true; + + private Printer() {} - private Printer(final boolean singleLineMode) { + /** Setter of singleLineMode */ + private Printer setSingleLineMode(boolean singleLineMode) { this.singleLineMode = singleLineMode; + return this; + } + + /** Setter of escapeNonAscii */ + private Printer setEscapeNonAscii(boolean escapeNonAscii) { + this.escapeNonAscii = escapeNonAscii; + return this; } - private void print(final Message message, final TextGenerator generator) + private void print(final MessageOrBuilder message, final TextGenerator generator) throws IOException { for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { @@ -339,7 +383,9 @@ public final class TextFormat { case STRING: generator.print("\""); - generator.print(escapeText((String) value)); + generator.print(escapeNonAscii ? + escapeText((String) value) : + (String) value); generator.print("\""); break; @@ -541,7 +587,7 @@ public final class TextFormat { private int previousLine = 0; private int previousColumn = 0; - // We use possesive quantifiers (*+ and ++) because otherwise the Java + // We use possessive quantifiers (*+ and ++) because otherwise the Java // regex matcher has stack overflows on large inputs. private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); @@ -864,7 +910,7 @@ public final class TextFormat { public ParseException parseException(final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( - (line + 1) + ":" + (column + 1) + ": " + description); + line + 1, column + 1, description); } /** @@ -875,7 +921,7 @@ public final class TextFormat { final String description) { // Note: People generally prefer one-based line and column numbers. return new ParseException( - (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); + previousLine + 1, previousColumn + 1, description); } /** @@ -900,8 +946,45 @@ public final class TextFormat { public static class ParseException extends IOException { private static final long serialVersionUID = 3196188060225107702L; + private final int line; + private final int column; + + /** Create a new instance, with -1 as the line and column numbers. */ public ParseException(final String message) { - super(message); + this(-1, -1, message); + } + + /** + * Create a new instance + * + * @param line the line number where the parse error occurred, + * using 1-offset. + * @param column the column number where the parser error occurred, + * using 1-offset. + */ + public ParseException(final int line, final int column, + final String message) { + super(Integer.toString(line) + ":" + column + ": " + message); + this.line = line; + this.column = column; + } + + /** + * Return the line where the parse exception occurred, or -1 when + * none is provided. The value is specified as 1-offset, so the first + * line is line 1. + */ + public int getLine() { + return line; + } + + /** + * Return the column where the parse exception occurred, or -1 when + * none is provided. The value is specified as 1-offset, so the first + * line is line 1. + */ + public int getColumn() { + return column; } } @@ -1073,7 +1156,7 @@ public final class TextFormat { mergeField(tokenizer, extensionRegistry, subBuilder); } - value = subBuilder.build(); + value = subBuilder.buildPartial(); } else { tokenizer.consume(":"); @@ -1212,7 +1295,7 @@ public final class TextFormat { */ static ByteString unescapeBytes(final CharSequence charString) throws InvalidEscapeSequenceException { - // First convert the Java characater sequence to UTF-8 bytes. + // First convert the Java character sequence to UTF-8 bytes. ByteString input = ByteString.copyFromUtf8(charString.toString()); // Then unescape certain byte sequences introduced by ASCII '\\'. The valid // escapes can all be expressed with ASCII characters, so it is safe to @@ -1349,7 +1432,7 @@ public final class TextFormat { /** * Parse a 32-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. + * and "0" to signify hexadecimal and octal numbers, respectively. */ static int parseInt32(final String text) throws NumberFormatException { return (int) parseInteger(text, true, false); @@ -1358,7 +1441,7 @@ public final class TextFormat { /** * Parse a 32-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. The + * and "0" to signify hexadecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code int} when returned since Java has * no unsigned integer type. */ @@ -1369,7 +1452,7 @@ public final class TextFormat { /** * Parse a 64-bit signed integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. + * and "0" to signify hexadecimal and octal numbers, respectively. */ static long parseInt64(final String text) throws NumberFormatException { return parseInteger(text, true, true); @@ -1378,7 +1461,7 @@ public final class TextFormat { /** * Parse a 64-bit unsigned integer from the text. Unlike the Java standard * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" - * and "0" to signify hexidecimal and octal numbers, respectively. The + * and "0" to signify hexadecimal and octal numbers, respectively. The * result is coerced to a (signed) {@code long} when returned since Java has * no unsigned long type. */ diff --git a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java index 26a15d00..45e2e6e4 100644 --- a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java +++ b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java @@ -46,7 +46,7 @@ import java.util.TreeMap; * {@code UnknownFieldSet} is used to keep track of fields which were seen when * parsing a protocol message but whose field numbers or types are unrecognized. * This most frequently occurs when new fields are added to a message type - * and then messages containing those feilds are read by old software that was + * and then messages containing those fields are read by old software that was * compiled before the new types were added. * * <p>Every {@link Message} contains an {@code UnknownFieldSet} (and every @@ -468,7 +468,7 @@ public final class UnknownFieldSet implements MessageLite { /** * Parse a single field from {@code input} and merge it into this set. * @param tag The field's tag number, which was already parsed. - * @return {@code false} if the tag is an engroup tag. + * @return {@code false} if the tag is an end group tag. */ public boolean mergeFieldFrom(final int tag, final CodedInputStream input) throws IOException { @@ -950,4 +950,29 @@ public final class UnknownFieldSet implements MessageLite { } } } + + /** + * Parser to implement MessageLite interface. + */ + public static final class Parser extends AbstractParser<UnknownFieldSet> { + public UnknownFieldSet parsePartialFrom( + CodedInputStream input, ExtensionRegistryLite extensionRegistry) + throws InvalidProtocolBufferException { + Builder builder = newBuilder(); + try { + builder.mergeFrom(input); + } catch (InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(builder.buildPartial()); + } catch (IOException e) { + throw new InvalidProtocolBufferException(e.getMessage()) + .setUnfinishedMessage(builder.buildPartial()); + } + return builder.buildPartial(); + } + } + + private static final Parser PARSER = new Parser(); + public final Parser getParserForType() { + return PARSER; + } } diff --git a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java index 83e5c796..f80f0968 100644 --- a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java +++ b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java @@ -32,6 +32,7 @@ package com.google.protobuf; import java.util.AbstractList; import java.util.RandomAccess; +import java.util.List; import java.util.ListIterator; import java.util.Iterator; @@ -143,4 +144,10 @@ public class UnmodifiableLazyStringList extends AbstractList<String> } }; } + + @Override + public List<?> getUnderlyingElements() { + // The returned value is already unmodifiable. + return list.getUnderlyingElements(); + } } diff --git a/java/src/main/java/com/google/protobuf/Utf8.java b/java/src/main/java/com/google/protobuf/Utf8.java new file mode 100644 index 00000000..388f7fc5 --- /dev/null +++ b/java/src/main/java/com/google/protobuf/Utf8.java @@ -0,0 +1,349 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// http://code.google.com/p/protobuf/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +/** + * A set of low-level, high-performance static utility methods related + * to the UTF-8 character encoding. This class has no dependencies + * outside of the core JDK libraries. + * + * <p>There are several variants of UTF-8. The one implemented by + * this class is the restricted definition of UTF-8 introduced in + * Unicode 3.1, which mandates the rejection of "overlong" byte + * sequences as well as rejection of 3-byte surrogate codepoint byte + * sequences. Note that the UTF-8 decoder included in Oracle's JDK + * has been modified to also reject "overlong" byte sequences, but (as + * of 2011) still accepts 3-byte surrogate codepoint byte sequences. + * + * <p>The byte sequences considered valid by this class are exactly + * those that can be roundtrip converted to Strings and back to bytes + * using the UTF-8 charset, without loss: <pre> {@code + * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8")) + * }</pre> + * + * <p>See the Unicode Standard,</br> + * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> + * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. + * + * <p>This class supports decoding of partial byte sequences, so that the + * bytes in a complete UTF-8 byte sequences can be stored in multiple + * segments. Methods typically return {@link #MALFORMED} if the partial + * byte sequence is definitely not well-formed, {@link #COMPLETE} if it is + * well-formed in the absence of additional input, or if the byte sequence + * apparently terminated in the middle of a character, an opaque integer + * "state" value containing enough information to decode the character when + * passed to a subsequent invocation of a partial decoding method. + * + * @author martinrb@google.com (Martin Buchholz) + */ +final class Utf8 { + private Utf8() {} + + /** + * State value indicating that the byte sequence is well-formed and + * complete (no further bytes are needed to complete a character). + */ + public static final int COMPLETE = 0; + + /** + * State value indicating that the byte sequence is definitely not + * well-formed. + */ + public static final int MALFORMED = -1; + + // Other state values include the partial bytes of the incomplete + // character to be decoded in the simplest way: we pack the bytes + // into the state int in little-endian order. For example: + // + // int state = byte1 ^ (byte2 << 8) ^ (byte3 << 16); + // + // Such a state is unpacked thus (note the ~ operation for byte2 to + // undo byte1's sign-extension bits): + // + // int byte1 = (byte) state; + // int byte2 = (byte) ~(state >> 8); + // int byte3 = (byte) (state >> 16); + // + // We cannot store a zero byte in the state because it would be + // indistinguishable from the absence of a byte. But we don't need + // to, because partial bytes must always be negative. When building + // a state, we ensure that byte1 is negative and subsequent bytes + // are valid trailing bytes. + + /** + * Returns {@code true} if the given byte array is a well-formed + * UTF-8 byte sequence. + * + * <p>This is a convenience method, equivalent to a call to {@code + * isValidUtf8(bytes, 0, bytes.length)}. + */ + public static boolean isValidUtf8(byte[] bytes) { + return isValidUtf8(bytes, 0, bytes.length); + } + + /** + * Returns {@code true} if the given byte array slice is a + * well-formed UTF-8 byte sequence. The range of bytes to be + * checked extends from index {@code index}, inclusive, to {@code + * limit}, exclusive. + * + * <p>This is a convenience method, equivalent to {@code + * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}. + */ + public static boolean isValidUtf8(byte[] bytes, int index, int limit) { + return partialIsValidUtf8(bytes, index, limit) == COMPLETE; + } + + /** + * Tells whether the given byte array slice is a well-formed, + * malformed, or incomplete UTF-8 byte sequence. The range of bytes + * to be checked extends from index {@code index}, inclusive, to + * {@code limit}, exclusive. + * + * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding + * operation) or the value returned from a call to a partial decoding method + * for the previous bytes + * + * @return {@link #MALFORMED} if the partial byte sequence is + * definitely not well-formed, {@link #COMPLETE} if it is well-formed + * (no additional input needed), or if the byte sequence is + * "incomplete", i.e. apparently terminated in the middle of a character, + * an opaque integer "state" value containing enough information to + * decode the character when passed to a subsequent invocation of a + * partial decoding method. + */ + public static int partialIsValidUtf8( + int state, byte[] bytes, int index, int limit) { + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (index >= limit) { // No bytes? No progress. + return state; + } + int byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 || + // byte2 trailing-byte test + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + int byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF || + // overlong? 5 most significant bits must not all be zero + (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) || + // illegal surrogate codepoint? + (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) || + // byte3 trailing-byte test + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + int byte2 = (byte) ~(state >> 8); + int byte3 = 0; + if (byte2 == 0) { + byte2 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = bytes[index++]; + if (index >= limit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF || + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 || + // byte3 trailing-byte test + byte3 > (byte) 0xBF || + // byte4 trailing-byte test + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } + } + + return partialIsValidUtf8(bytes, index, limit); + } + + /** + * Tells whether the given byte array slice is a well-formed, + * malformed, or incomplete UTF-8 byte sequence. The range of bytes + * to be checked extends from index {@code index}, inclusive, to + * {@code limit}, exclusive. + * + * <p>This is a convenience method, equivalent to a call to {@code + * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}. + * + * @return {@link #MALFORMED} if the partial byte sequence is + * definitely not well-formed, {@link #COMPLETE} if it is well-formed + * (no additional input needed), or if the byte sequence is + * "incomplete", i.e. apparently terminated in the middle of a character, + * an opaque integer "state" value containing enough information to + * decode the character when passed to a subsequent invocation of a + * partial decoding method. + */ + public static int partialIsValidUtf8( + byte[] bytes, int index, int limit) { + // Optimize for 100% ASCII. + // Hotspot loves small simple top-level loops like this. + while (index < limit && bytes[index] >= 0) { + index++; + } + + return (index >= limit) ? COMPLETE : + partialIsValidUtf8NonAscii(bytes, index, limit); + } + + private static int partialIsValidUtf8NonAscii( + byte[] bytes, int index, int limit) { + for (;;) { + int byte1, byte2; + + // Optimize for interior runs of ASCII bytes. + do { + if (index >= limit) { + return COMPLETE; + } + } while ((byte1 = bytes[index++]) >= 0); + + if (byte1 < (byte) 0xE0) { + // two-byte form + + if (index >= limit) { + return byte1; + } + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 || + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + if (index >= limit - 1) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes[index++]) > (byte) 0xBF || + // overlong? 5 most significant bits must not all be zero + (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) || + // check for illegal surrogate codepoints + (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) || + // byte3 trailing-byte test + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + if (index >= limit - 2) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes[index++]) > (byte) 0xBF || + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 || + // byte3 trailing-byte test + bytes[index++] > (byte) 0xBF || + // byte4 trailing-byte test + bytes[index++] > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + + private static int incompleteStateFor(int byte1) { + return (byte1 > (byte) 0xF4) ? + MALFORMED : byte1; + } + + private static int incompleteStateFor(int byte1, int byte2) { + return (byte1 > (byte) 0xF4 || + byte2 > (byte) 0xBF) ? + MALFORMED : byte1 ^ (byte2 << 8); + } + + private static int incompleteStateFor(int byte1, int byte2, int byte3) { + return (byte1 > (byte) 0xF4 || + byte2 > (byte) 0xBF || + byte3 > (byte) 0xBF) ? + MALFORMED : byte1 ^ (byte2 << 8) ^ (byte3 << 16); + } + + private static int incompleteStateFor(byte[] bytes, int index, int limit) { + int byte1 = bytes[index - 1]; + switch (limit - index) { + case 0: return incompleteStateFor(byte1); + case 1: return incompleteStateFor(byte1, bytes[index]); + case 2: return incompleteStateFor(byte1, bytes[index], bytes[index + 1]); + default: throw new AssertionError(); + } + } +} diff --git a/java/src/main/java/com/google/protobuf/WireFormat.java b/java/src/main/java/com/google/protobuf/WireFormat.java index a30f2a3c..dd2d6310 100644 --- a/java/src/main/java/com/google/protobuf/WireFormat.java +++ b/java/src/main/java/com/google/protobuf/WireFormat.java @@ -146,7 +146,7 @@ public final class WireFormat { public boolean isPackable() { return true; } } - // Field numbers for feilds in MessageSet wire format. + // Field numbers for fields in MessageSet wire format. static final int MESSAGE_SET_ITEM = 1; static final int MESSAGE_SET_TYPE_ID = 2; static final int MESSAGE_SET_MESSAGE = 3; |