aboutsummaryrefslogtreecommitdiff
path: root/java/src/main
diff options
context:
space:
mode:
authorxiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2012-09-22 02:40:50 +0000
committerxiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2012-09-22 02:40:50 +0000
commitb55a20fa2c669b181f47ea9219b8e74d1263da19 (patch)
tree3936a0e7c22196587a6d8397372de41434fe2129 /java/src/main
parent9ced30caf94bb4e7e9629c199679ff44e8ca7389 (diff)
downloadprotobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.gz
protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.bz2
protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.zip
Down-integrate from internal branch
Diffstat (limited to 'java/src/main')
-rw-r--r--java/src/main/java/com/google/protobuf/AbstractMessage.java336
-rw-r--r--java/src/main/java/com/google/protobuf/AbstractMessageLite.java26
-rw-r--r--java/src/main/java/com/google/protobuf/AbstractParser.java261
-rw-r--r--java/src/main/java/com/google/protobuf/BoundedByteString.java163
-rw-r--r--java/src/main/java/com/google/protobuf/ByteString.java877
-rw-r--r--java/src/main/java/com/google/protobuf/CodedInputStream.java41
-rw-r--r--java/src/main/java/com/google/protobuf/CodedOutputStream.java34
-rw-r--r--java/src/main/java/com/google/protobuf/Descriptors.java157
-rw-r--r--java/src/main/java/com/google/protobuf/DynamicMessage.java68
-rw-r--r--java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java18
-rw-r--r--java/src/main/java/com/google/protobuf/FieldSet.java137
-rw-r--r--java/src/main/java/com/google/protobuf/GeneratedMessage.java179
-rw-r--r--java/src/main/java/com/google/protobuf/GeneratedMessageLite.java280
-rw-r--r--java/src/main/java/com/google/protobuf/Internal.java95
-rw-r--r--java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java21
-rw-r--r--java/src/main/java/com/google/protobuf/LazyField.java216
-rw-r--r--java/src/main/java/com/google/protobuf/LazyStringArrayList.java30
-rw-r--r--java/src/main/java/com/google/protobuf/LazyStringList.java17
-rw-r--r--java/src/main/java/com/google/protobuf/LiteralByteString.java349
-rw-r--r--java/src/main/java/com/google/protobuf/Message.java30
-rw-r--r--java/src/main/java/com/google/protobuf/MessageLite.java40
-rw-r--r--java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java2
-rw-r--r--java/src/main/java/com/google/protobuf/MessageOrBuilder.java23
-rw-r--r--java/src/main/java/com/google/protobuf/Parser.java259
-rw-r--r--java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java14
-rw-r--r--java/src/main/java/com/google/protobuf/RopeByteString.java945
-rw-r--r--java/src/main/java/com/google/protobuf/SingleFieldBuilder.java12
-rw-r--r--java/src/main/java/com/google/protobuf/SmallSortedMap.java10
-rw-r--r--java/src/main/java/com/google/protobuf/TextFormat.java121
-rw-r--r--java/src/main/java/com/google/protobuf/UnknownFieldSet.java29
-rw-r--r--java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java7
-rw-r--r--java/src/main/java/com/google/protobuf/Utf8.java349
-rw-r--r--java/src/main/java/com/google/protobuf/WireFormat.java2
33 files changed, 4519 insertions, 629 deletions
diff --git a/java/src/main/java/com/google/protobuf/AbstractMessage.java b/java/src/main/java/com/google/protobuf/AbstractMessage.java
index b9d83016..f4d115de 100644
--- a/java/src/main/java/com/google/protobuf/AbstractMessage.java
+++ b/java/src/main/java/com/google/protobuf/AbstractMessage.java
@@ -32,6 +32,7 @@ package com.google.protobuf;
import com.google.protobuf.Descriptors.Descriptor;
import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.GeneratedMessage.ExtendableBuilder;
import com.google.protobuf.Internal.EnumLite;
import java.io.IOException;
@@ -81,6 +82,25 @@ public abstract class AbstractMessage extends AbstractMessageLite
return true;
}
+ public List<String> findInitializationErrors() {
+ return Builder.findMissingFields(this);
+ }
+
+ public String getInitializationErrorString() {
+ return delimitWithCommas(findInitializationErrors());
+ }
+
+ private static String delimitWithCommas(List<String> parts) {
+ StringBuilder result = new StringBuilder();
+ for (String part : parts) {
+ if (result.length() > 0) {
+ result.append(", ");
+ }
+ result.append(part);
+ }
+ return result.toString();
+ }
+
@Override
public final String toString() {
return TextFormat.printToString(this);
@@ -209,6 +229,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
}
/**
+ * Package private helper method for AbstractParser to create
+ * UninitializedMessageException with missing field information.
+ */
+ @Override
+ UninitializedMessageException newUninitializedMessageException() {
+ return Builder.newUninitializedMessageException(this);
+ }
+
+ /**
* Helper method for implementing {@link Message#hashCode()}.
* <p>
* This is needed because {@link java.lang.Enum#hashCode()} is final, but we
@@ -251,6 +280,14 @@ public abstract class AbstractMessage extends AbstractMessageLite
return (BuilderType) this;
}
+ public List<String> findInitializationErrors() {
+ return findMissingFields(this);
+ }
+
+ public String getInitializationErrorString() {
+ return delimitWithCommas(findInitializationErrors());
+ }
+
public BuilderType mergeFrom(final Message other) {
if (other.getDescriptorForType() != getDescriptorForType()) {
throw new IllegalArgumentException(
@@ -314,7 +351,7 @@ public abstract class AbstractMessage extends AbstractMessageLite
}
if (!mergeFieldFrom(input, unknownFields, extensionRegistry,
- this, tag)) {
+ getDescriptorForType(), this, null, tag)) {
// end group tag
break;
}
@@ -323,25 +360,93 @@ public abstract class AbstractMessage extends AbstractMessageLite
return (BuilderType) this;
}
+ /** helper method to handle {@code builder} and {@code extensions}. */
+ private static void addRepeatedField(
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ FieldDescriptor field,
+ Object value) {
+ if (builder != null) {
+ builder.addRepeatedField(field, value);
+ } else {
+ extensions.addRepeatedField(field, value);
+ }
+ }
+
+ /** helper method to handle {@code builder} and {@code extensions}. */
+ private static void setField(
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ FieldDescriptor field,
+ Object value) {
+ if (builder != null) {
+ builder.setField(field, value);
+ } else {
+ extensions.setField(field, value);
+ }
+ }
+
+ /** helper method to handle {@code builder} and {@code extensions}. */
+ private static boolean hasOriginalMessage(
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ FieldDescriptor field) {
+ if (builder != null) {
+ return builder.hasField(field);
+ } else {
+ return extensions.hasField(field);
+ }
+ }
+
+ /** helper method to handle {@code builder} and {@code extensions}. */
+ private static Message getOriginalMessage(
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ FieldDescriptor field) {
+ if (builder != null) {
+ return (Message) builder.getField(field);
+ } else {
+ return (Message) extensions.getField(field);
+ }
+ }
+
+ /** helper method to handle {@code builder} and {@code extensions}. */
+ private static void mergeOriginalMessage(
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ FieldDescriptor field,
+ Message.Builder subBuilder) {
+ Message originalMessage = getOriginalMessage(builder, extensions, field);
+ if (originalMessage != null) {
+ subBuilder.mergeFrom(originalMessage);
+ }
+ }
+
/**
- * Like {@link #mergeFrom(CodedInputStream, UnknownFieldSet.Builder,
- * ExtensionRegistryLite, Message.Builder)}, but parses a single field.
+ * Like {@link #mergeFrom(CodedInputStream, ExtensionRegistryLite)}, but
+ * parses a single field.
+ *
+ * When {@code builder} is not null, the method will parse and merge the
+ * field into {@code builder}. Otherwise, it will try to parse the field
+ * into {@code extensions}, when it's called by the parsing constructor in
+ * generated classes.
+ *
* Package-private because it is used by GeneratedMessage.ExtendableMessage.
* @param tag The tag, which should have already been read.
* @return {@code true} unless the tag is an end-group tag.
*/
static boolean mergeFieldFrom(
- final CodedInputStream input,
- final UnknownFieldSet.Builder unknownFields,
- final ExtensionRegistryLite extensionRegistry,
- final Message.Builder builder,
- final int tag) throws IOException {
- final Descriptor type = builder.getDescriptorForType();
-
+ CodedInputStream input,
+ UnknownFieldSet.Builder unknownFields,
+ ExtensionRegistryLite extensionRegistry,
+ Descriptor type,
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions,
+ int tag) throws IOException {
if (type.getOptions().getMessageSetWireFormat() &&
tag == WireFormat.MESSAGE_SET_ITEM_TAG) {
mergeMessageSetExtensionFromCodedStream(
- input, unknownFields, extensionRegistry, builder);
+ input, unknownFields, extensionRegistry, type, builder, extensions);
return true;
}
@@ -376,8 +481,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
} else {
field = null;
}
- } else {
+ } else if (builder != null) {
field = type.findFieldByNumber(fieldNumber);
+ } else {
+ field = null;
}
boolean unknown = false;
@@ -413,13 +520,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
// enum, drop it (don't even add it to unknownFields).
return true;
}
- builder.addRepeatedField(field, value);
+ addRepeatedField(builder, extensions, field, value);
}
} else {
while (input.getBytesUntilLimit() > 0) {
final Object value =
FieldSet.readPrimitiveField(input, field.getLiteType());
- builder.addRepeatedField(field, value);
+ addRepeatedField(builder, extensions, field, value);
}
}
input.popLimit(limit);
@@ -434,10 +541,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
subBuilder = builder.newBuilderForField(field);
}
if (!field.isRepeated()) {
- subBuilder.mergeFrom((Message) builder.getField(field));
+ mergeOriginalMessage(builder, extensions, field, subBuilder);
}
input.readGroup(field.getNumber(), subBuilder, extensionRegistry);
- value = subBuilder.build();
+ value = subBuilder.buildPartial();
break;
}
case MESSAGE: {
@@ -448,10 +555,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
subBuilder = builder.newBuilderForField(field);
}
if (!field.isRepeated()) {
- subBuilder.mergeFrom((Message) builder.getField(field));
+ mergeOriginalMessage(builder, extensions, field, subBuilder);
}
input.readMessage(subBuilder, extensionRegistry);
- value = subBuilder.build();
+ value = subBuilder.buildPartial();
break;
}
case ENUM:
@@ -470,22 +577,28 @@ public abstract class AbstractMessage extends AbstractMessageLite
}
if (field.isRepeated()) {
- builder.addRepeatedField(field, value);
+ addRepeatedField(builder, extensions, field, value);
} else {
- builder.setField(field, value);
+ setField(builder, extensions, field, value);
}
}
return true;
}
- /** Called by {@code #mergeFieldFrom()} to parse a MessageSet extension. */
+ /**
+ * Called by {@code #mergeFieldFrom()} to parse a MessageSet extension.
+ * If {@code builder} is not null, this method will merge MessageSet into
+ * the builder. Otherwise, it will merge the MessageSet into {@code
+ * extensions}.
+ */
private static void mergeMessageSetExtensionFromCodedStream(
- final CodedInputStream input,
- final UnknownFieldSet.Builder unknownFields,
- final ExtensionRegistryLite extensionRegistry,
- final Message.Builder builder) throws IOException {
- final Descriptor type = builder.getDescriptorForType();
+ CodedInputStream input,
+ UnknownFieldSet.Builder unknownFields,
+ ExtensionRegistryLite extensionRegistry,
+ Descriptor type,
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions) throws IOException {
// The wire format for MessageSet is:
// message MessageSet {
@@ -504,10 +617,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
// should be prepared to accept them.
int typeId = 0;
- ByteString rawBytes = null; // If we encounter "message" before "typeId"
- Message.Builder subBuilder = null;
- FieldDescriptor field = null;
+ ByteString rawBytes = null; // If we encounter "message" before "typeId"
+ ExtensionRegistry.ExtensionInfo extension = null;
+ // Read bytes from input, if we get it's type first then parse it eagerly,
+ // otherwise we store the raw bytes in a local variable.
while (true) {
final int tag = input.readTag();
if (tag == 0) {
@@ -516,75 +630,121 @@ public abstract class AbstractMessage extends AbstractMessageLite
if (tag == WireFormat.MESSAGE_SET_TYPE_ID_TAG) {
typeId = input.readUInt32();
- // Zero is not a valid type ID.
if (typeId != 0) {
- final ExtensionRegistry.ExtensionInfo extension;
-
// extensionRegistry may be either ExtensionRegistry or
- // ExtensionRegistryLite. Since the type we are parsing is a full
+ // ExtensionRegistryLite. Since the type we are parsing is a full
// message, only a full ExtensionRegistry could possibly contain
- // extensions of it. Otherwise we will treat the registry as if it
+ // extensions of it. Otherwise we will treat the registry as if it
// were empty.
if (extensionRegistry instanceof ExtensionRegistry) {
extension = ((ExtensionRegistry) extensionRegistry)
.findExtensionByNumber(type, typeId);
- } else {
- extension = null;
- }
-
- if (extension != null) {
- field = extension.descriptor;
- subBuilder = extension.defaultInstance.newBuilderForType();
- final Message originalMessage = (Message)builder.getField(field);
- if (originalMessage != null) {
- subBuilder.mergeFrom(originalMessage);
- }
- if (rawBytes != null) {
- // We already encountered the message. Parse it now.
- subBuilder.mergeFrom(
- CodedInputStream.newInstance(rawBytes.newInput()));
- rawBytes = null;
- }
- } else {
- // Unknown extension number. If we already saw data, put it
- // in rawBytes.
- if (rawBytes != null) {
- unknownFields.mergeField(typeId,
- UnknownFieldSet.Field.newBuilder()
- .addLengthDelimited(rawBytes)
- .build());
- rawBytes = null;
- }
}
}
+
} else if (tag == WireFormat.MESSAGE_SET_MESSAGE_TAG) {
- if (typeId == 0) {
- // We haven't seen a type ID yet, so we have to store the raw bytes
- // for now.
- rawBytes = input.readBytes();
- } else if (subBuilder == null) {
- // We don't know how to parse this. Ignore it.
- unknownFields.mergeField(typeId,
- UnknownFieldSet.Field.newBuilder()
- .addLengthDelimited(input.readBytes())
- .build());
- } else {
- // We already know the type, so we can parse directly from the input
- // with no copying. Hooray!
- input.readMessage(subBuilder, extensionRegistry);
+ if (typeId != 0) {
+ if (extension != null && ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+ // We already know the type, so we can parse directly from the
+ // input with no copying. Hooray!
+ eagerlyMergeMessageSetExtension(
+ input, extension, extensionRegistry, builder, extensions);
+ rawBytes = null;
+ continue;
+ }
}
- } else {
- // Unknown tag. Skip it.
+ // We haven't seen a type ID yet or we want parse message lazily.
+ rawBytes = input.readBytes();
+
+ } else { // Unknown tag. Skip it.
if (!input.skipField(tag)) {
- break; // end of group
+ break; // End of group
}
}
}
-
input.checkLastTagWas(WireFormat.MESSAGE_SET_ITEM_END_TAG);
- if (subBuilder != null) {
- builder.setField(field, subBuilder.build());
+ // Process the raw bytes.
+ if (rawBytes != null && typeId != 0) { // Zero is not a valid type ID.
+ if (extension != null) { // We known the type
+ mergeMessageSetExtensionFromBytes(
+ rawBytes, extension, extensionRegistry, builder, extensions);
+ } else { // We don't know how to parse this. Ignore it.
+ if (rawBytes != null) {
+ unknownFields.mergeField(typeId, UnknownFieldSet.Field.newBuilder()
+ .addLengthDelimited(rawBytes).build());
+ }
+ }
+ }
+ }
+
+ private static void eagerlyMergeMessageSetExtension(
+ CodedInputStream input,
+ ExtensionRegistry.ExtensionInfo extension,
+ ExtensionRegistryLite extensionRegistry,
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions) throws IOException {
+
+ FieldDescriptor field = extension.descriptor;
+ Message value = null;
+ if (hasOriginalMessage(builder, extensions, field)) {
+ Message originalMessage =
+ getOriginalMessage(builder, extensions, field);
+ Message.Builder subBuilder = originalMessage.toBuilder();
+ input.readMessage(subBuilder, extensionRegistry);
+ value = subBuilder.buildPartial();
+ } else {
+ value = input.readMessage(extension.defaultInstance.getParserForType(),
+ extensionRegistry);
+ }
+
+ if (builder != null) {
+ builder.setField(field, value);
+ } else {
+ extensions.setField(field, value);
+ }
+ }
+
+ private static void mergeMessageSetExtensionFromBytes(
+ ByteString rawBytes,
+ ExtensionRegistry.ExtensionInfo extension,
+ ExtensionRegistryLite extensionRegistry,
+ Message.Builder builder,
+ FieldSet<FieldDescriptor> extensions) throws IOException {
+
+ FieldDescriptor field = extension.descriptor;
+ boolean hasOriginalValue = hasOriginalMessage(builder, extensions, field);
+
+ if (hasOriginalValue || ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+ // If the field already exists, we just parse the field.
+ Message value = null;
+ if (hasOriginalValue) {
+ Message originalMessage =
+ getOriginalMessage(builder, extensions, field);
+ Message.Builder subBuilder= originalMessage.toBuilder();
+ subBuilder.mergeFrom(rawBytes, extensionRegistry);
+ value = subBuilder.buildPartial();
+ } else {
+ value = extension.defaultInstance.getParserForType()
+ .parsePartialFrom(rawBytes, extensionRegistry);
+ }
+ setField(builder, extensions, field, value);
+ } else {
+ // Use LazyField to load MessageSet lazily.
+ LazyField lazyField = new LazyField(
+ extension.defaultInstance, extensionRegistry, rawBytes);
+ if (builder != null) {
+ // TODO(xiangl): it looks like this method can only be invoked by
+ // ExtendableBuilder, but I'm not sure. So I double check the type of
+ // builder here. It may be useless and need more investigation.
+ if (builder instanceof ExtendableBuilder) {
+ builder.setField(field, lazyField);
+ } else {
+ builder.setField(field, lazyField.getValue());
+ }
+ } else {
+ extensions.setField(field, lazyField);
+ }
}
}
@@ -596,6 +756,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
return (BuilderType) this;
}
+ public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+ throw new UnsupportedOperationException(
+ "getFieldBuilder() called on an unsupported message type.");
+ }
+
/**
* Construct an UninitializedMessageException reporting missing fields in
* the given message.
@@ -609,14 +774,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
* Populates {@code this.missingFields} with the full "path" of each
* missing required field in the given message.
*/
- private static List<String> findMissingFields(final Message message) {
+ private static List<String> findMissingFields(
+ final MessageOrBuilder message) {
final List<String> results = new ArrayList<String>();
findMissingFields(message, "", results);
return results;
}
/** Recursive helper implementing {@link #findMissingFields(Message)}. */
- private static void findMissingFields(final Message message,
+ private static void findMissingFields(final MessageOrBuilder message,
final String prefix,
final List<String> results) {
for (final FieldDescriptor field :
@@ -635,13 +801,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
if (field.isRepeated()) {
int i = 0;
for (final Object element : (List) value) {
- findMissingFields((Message) element,
+ findMissingFields((MessageOrBuilder) element,
subMessagePrefix(prefix, field, i++),
results);
}
} else {
if (message.hasField(field)) {
- findMissingFields((Message) value,
+ findMissingFields((MessageOrBuilder) value,
subMessagePrefix(prefix, field, -1),
results);
}
diff --git a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
index 77b27370..9926f3db 100644
--- a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
+++ b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
@@ -92,6 +92,14 @@ public abstract class AbstractMessageLite implements MessageLite {
}
/**
+ * Package private helper method for AbstractParser to create
+ * UninitializedMessageException.
+ */
+ UninitializedMessageException newUninitializedMessageException() {
+ return new UninitializedMessageException(this);
+ }
+
+ /**
* A partial implementation of the {@link Message.Builder} interface which
* implements as many methods of that interface as possible in terms of
* other methods.
@@ -307,10 +315,12 @@ public abstract class AbstractMessageLite implements MessageLite {
*/
protected static <T> void addAll(final Iterable<T> values,
final Collection<? super T> list) {
- for (final T value : values) {
- if (value == null) {
- throw new NullPointerException();
- }
+ if (values instanceof LazyStringList) {
+ // For StringOrByteStringLists, check the underlying elements to avoid
+ // forcing conversions of ByteStrings to Strings.
+ checkForNullValues(((LazyStringList) values).getUnderlyingElements());
+ } else {
+ checkForNullValues(values);
}
if (values instanceof Collection) {
final Collection<T> collection = (Collection<T>) values;
@@ -321,5 +331,13 @@ public abstract class AbstractMessageLite implements MessageLite {
}
}
}
+
+ private static void checkForNullValues(final Iterable<?> values) {
+ for (final Object value : values) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ }
+ }
}
}
diff --git a/java/src/main/java/com/google/protobuf/AbstractParser.java b/java/src/main/java/com/google/protobuf/AbstractParser.java
new file mode 100644
index 00000000..9bd9d397
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/AbstractParser.java
@@ -0,0 +1,261 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A partial implementation of the {@link Parser} interface which implements
+ * as many methods of that interface as possible in terms of other methods.
+ *
+ * Note: This class implements all the convenience methods in the
+ * {@link Parser} interface. See {@link Parser} for related javadocs.
+ * Subclasses need to implement
+ * {@link Parser#parsePartialFrom(CodedInputStream, ExtensionRegistryLite)}
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public abstract class AbstractParser<MessageType extends MessageLite>
+ implements Parser<MessageType> {
+ /**
+ * Creates an UninitializedMessageException for MessageType.
+ */
+ private UninitializedMessageException
+ newUninitializedMessageException(MessageType message) {
+ if (message instanceof AbstractMessageLite) {
+ return ((AbstractMessageLite) message).newUninitializedMessageException();
+ }
+ return new UninitializedMessageException(message);
+ }
+
+ /**
+ * Helper method to check if message is initialized.
+ *
+ * @throws InvalidProtocolBufferException if it is not initialized.
+ * @return The message to check.
+ */
+ private MessageType checkMessageInitialized(MessageType message)
+ throws InvalidProtocolBufferException {
+ if (message != null && !message.isInitialized()) {
+ throw newUninitializedMessageException(message)
+ .asInvalidProtocolBufferException()
+ .setUnfinishedMessage(message);
+ }
+ return message;
+ }
+
+ private static final ExtensionRegistryLite EMPTY_REGISTRY
+ = ExtensionRegistryLite.getEmptyRegistry();
+
+ public MessageType parsePartialFrom(CodedInputStream input)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(input, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseFrom(CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(input, extensionRegistry));
+ }
+
+ public MessageType parseFrom(CodedInputStream input)
+ throws InvalidProtocolBufferException {
+ return parseFrom(input, EMPTY_REGISTRY);
+ }
+
+ public MessageType parsePartialFrom(ByteString data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ MessageType message;
+ try {
+ CodedInputStream input = data.newCodedInput();
+ message = parsePartialFrom(input, extensionRegistry);
+ try {
+ input.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ } catch (InvalidProtocolBufferException e) {
+ throw e;
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Reading from a ByteString threw an IOException (should " +
+ "never happen).", e);
+ }
+ }
+
+ public MessageType parsePartialFrom(ByteString data)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(data, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseFrom(ByteString data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(parsePartialFrom(data, extensionRegistry));
+ }
+
+ public MessageType parseFrom(ByteString data)
+ throws InvalidProtocolBufferException {
+ return parseFrom(data, EMPTY_REGISTRY);
+ }
+
+ public MessageType parsePartialFrom(byte[] data, int off, int len,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ try {
+ CodedInputStream input = CodedInputStream.newInstance(data, off, len);
+ MessageType message = parsePartialFrom(input, extensionRegistry);
+ try {
+ input.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ } catch (InvalidProtocolBufferException e) {
+ throw e;
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Reading from a byte array threw an IOException (should " +
+ "never happen).", e);
+ }
+ }
+
+ public MessageType parsePartialFrom(byte[] data, int off, int len)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(data, off, len, EMPTY_REGISTRY);
+ }
+
+ public MessageType parsePartialFrom(byte[] data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(data, 0, data.length, extensionRegistry);
+ }
+
+ public MessageType parsePartialFrom(byte[] data)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(data, 0, data.length, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseFrom(byte[] data, int off, int len,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(data, off, len, extensionRegistry));
+ }
+
+ public MessageType parseFrom(byte[] data, int off, int len)
+ throws InvalidProtocolBufferException {
+ return parseFrom(data, off, len, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseFrom(byte[] data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return parseFrom(data, 0, data.length, extensionRegistry);
+ }
+
+ public MessageType parseFrom(byte[] data)
+ throws InvalidProtocolBufferException {
+ return parseFrom(data, EMPTY_REGISTRY);
+ }
+
+ public MessageType parsePartialFrom(InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ CodedInputStream codedInput = CodedInputStream.newInstance(input);
+ MessageType message = parsePartialFrom(codedInput, extensionRegistry);
+ try {
+ codedInput.checkLastTagWas(0);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(message);
+ }
+ return message;
+ }
+
+ public MessageType parsePartialFrom(InputStream input)
+ throws InvalidProtocolBufferException {
+ return parsePartialFrom(input, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseFrom(InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialFrom(input, extensionRegistry));
+ }
+
+ public MessageType parseFrom(InputStream input)
+ throws InvalidProtocolBufferException {
+ return parseFrom(input, EMPTY_REGISTRY);
+ }
+
+ public MessageType parsePartialDelimitedFrom(
+ InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ int size;
+ try {
+ int firstByte = input.read();
+ if (firstByte == -1) {
+ return null;
+ }
+ size = CodedInputStream.readRawVarint32(firstByte, input);
+ } catch (IOException e) {
+ throw new InvalidProtocolBufferException(e.getMessage());
+ }
+ InputStream limitedInput = new LimitedInputStream(input, size);
+ return parsePartialFrom(limitedInput, extensionRegistry);
+ }
+
+ public MessageType parsePartialDelimitedFrom(InputStream input)
+ throws InvalidProtocolBufferException {
+ return parsePartialDelimitedFrom(input, EMPTY_REGISTRY);
+ }
+
+ public MessageType parseDelimitedFrom(
+ InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ return checkMessageInitialized(
+ parsePartialDelimitedFrom(input, extensionRegistry));
+ }
+
+ public MessageType parseDelimitedFrom(InputStream input)
+ throws InvalidProtocolBufferException {
+ return parseDelimitedFrom(input, EMPTY_REGISTRY);
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/BoundedByteString.java b/java/src/main/java/com/google/protobuf/BoundedByteString.java
new file mode 100644
index 00000000..cd4982c3
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/BoundedByteString.java
@@ -0,0 +1,163 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.NoSuchElementException;
+
+/**
+ * This class is used to represent the substring of a {@link ByteString} over a
+ * single byte array. In terms of the public API of {@link ByteString}, you end
+ * up here by calling {@link ByteString#copyFrom(byte[])} followed by {@link
+ * ByteString#substring(int, int)}.
+ *
+ * <p>This class contains most of the overhead involved in creating a substring
+ * from a {@link LiteralByteString}. The overhead involves some range-checking
+ * and two extra fields.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class BoundedByteString extends LiteralByteString {
+
+ private final int bytesOffset;
+ private final int bytesLength;
+
+ /**
+ * Creates a {@code BoundedByteString} backed by the sub-range of given array,
+ * without copying.
+ *
+ * @param bytes array to wrap
+ * @param offset index to first byte to use in bytes
+ * @param length number of bytes to use from bytes
+ * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0},
+ * or if {@code offset + length >
+ * bytes.length}.
+ */
+ BoundedByteString(byte[] bytes, int offset, int length) {
+ super(bytes);
+ if (offset < 0) {
+ throw new IllegalArgumentException("Offset too small: " + offset);
+ }
+ if (length < 0) {
+ throw new IllegalArgumentException("Length too small: " + offset);
+ }
+ if ((long) offset + length > bytes.length) {
+ throw new IllegalArgumentException(
+ "Offset+Length too large: " + offset + "+" + length);
+ }
+
+ this.bytesOffset = offset;
+ this.bytesLength = length;
+ }
+
+ /**
+ * Gets the byte at the given index.
+ * Throws {@link ArrayIndexOutOfBoundsException}
+ * for backwards-compatibility reasons although it would more properly be
+ * {@link IndexOutOfBoundsException}.
+ *
+ * @param index index of byte
+ * @return the value
+ * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+ */
+ @Override
+ public byte byteAt(int index) {
+ // We must check the index ourselves as we cannot rely on Java array index
+ // checking for substrings.
+ if (index < 0) {
+ throw new ArrayIndexOutOfBoundsException("Index too small: " + index);
+ }
+ if (index >= size()) {
+ throw new ArrayIndexOutOfBoundsException(
+ "Index too large: " + index + ", " + size());
+ }
+
+ return bytes[bytesOffset + index];
+ }
+
+ @Override
+ public int size() {
+ return bytesLength;
+ }
+
+ @Override
+ protected int getOffsetIntoBytes() {
+ return bytesOffset;
+ }
+
+ // =================================================================
+ // ByteString -> byte[]
+
+ @Override
+ protected void copyToInternal(byte[] target, int sourceOffset,
+ int targetOffset, int numberToCopy) {
+ System.arraycopy(bytes, getOffsetIntoBytes() + sourceOffset, target,
+ targetOffset, numberToCopy);
+ }
+
+ // =================================================================
+ // ByteIterator
+
+ @Override
+ public ByteIterator iterator() {
+ return new BoundedByteIterator();
+ }
+
+ private class BoundedByteIterator implements ByteIterator {
+
+ private int position;
+ private final int limit;
+
+ private BoundedByteIterator() {
+ position = getOffsetIntoBytes();
+ limit = position + size();
+ }
+
+ public boolean hasNext() {
+ return (position < limit);
+ }
+
+ public Byte next() {
+ // Boxing calls Byte.valueOf(byte), which does not instantiate.
+ return nextByte();
+ }
+
+ public byte nextByte() {
+ if (position >= limit) {
+ throw new NoSuchElementException();
+ }
+ return bytes[position++];
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java
index 91356357..1b18169e 100644
--- a/java/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/src/main/java/com/google/protobuf/ByteString.java
@@ -30,140 +30,413 @@
package com.google.protobuf;
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
-import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
import java.util.List;
+import java.util.NoSuchElementException;
/**
- * Immutable array of bytes.
+ * Immutable sequence of bytes. Substring is supported by sharing the reference
+ * to the immutable underlying bytes, as with {@link String}. Concatenation is
+ * likewise supported without copying (long strings) by building a tree of
+ * pieces in {@link RopeByteString}.
+ * <p>
+ * Like {@link String}, the contents of a {@link ByteString} can never be
+ * observed to change, not even in the presence of a data race or incorrect
+ * API usage in the client code.
*
* @author crazybob@google.com Bob Lee
* @author kenton@google.com Kenton Varda
+ * @author carlanton@google.com Carl Haverl
+ * @author martinrb@google.com Martin Buchholz
*/
-public final class ByteString {
- private final byte[] bytes;
+public abstract class ByteString implements Iterable<Byte> {
- private ByteString(final byte[] bytes) {
- this.bytes = bytes;
- }
+ /**
+ * When two strings to be concatenated have a combined length shorter than
+ * this, we just copy their bytes on {@link #concat(ByteString)}.
+ * The trade-off is copy size versus the overhead of creating tree nodes
+ * in {@link RopeByteString}.
+ */
+ static final int CONCATENATE_BY_COPY_SIZE = 128;
+
+ /**
+ * When copying an InputStream into a ByteString with .readFrom(),
+ * the chunks in the underlying rope start at 256 bytes, but double
+ * each iteration up to 8192 bytes.
+ */
+ static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
+ static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
/**
- * Gets the byte at the given index.
+ * Empty {@code ByteString}.
+ */
+ public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
+
+ // This constructor is here to prevent subclassing outside of this package,
+ ByteString() {}
+
+ /**
+ * Gets the byte at the given index. This method should be used only for
+ * random access to individual bytes. To access bytes sequentially, use the
+ * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
+ * #substring(int, int)} first if necessary.
*
+ * @param index index of byte
+ * @return the value
* @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
*/
- public byte byteAt(final int index) {
- return bytes[index];
+ public abstract byte byteAt(int index);
+
+ /**
+ * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
+ * To avoid auto-boxing, you may get the iterator manually and call
+ * {@link ByteIterator#nextByte()}.
+ *
+ * @return the iterator
+ */
+ public abstract ByteIterator iterator();
+
+ /**
+ * This interface extends {@code Iterator<Byte>}, so that we can return an
+ * unboxed {@code byte}.
+ */
+ public interface ByteIterator extends Iterator<Byte> {
+ /**
+ * An alternative to {@link Iterator#next()} that returns an
+ * unboxed primitive {@code byte}.
+ *
+ * @return the next {@code byte} in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ byte nextByte();
}
/**
* Gets the number of bytes.
+ *
+ * @return size in bytes
*/
- public int size() {
- return bytes.length;
- }
+ public abstract int size();
/**
* Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
+ *
+ * @return true if this is zero bytes long
*/
public boolean isEmpty() {
- return bytes.length == 0;
+ return size() == 0;
}
// =================================================================
- // byte[] -> ByteString
+ // ByteString -> substring
/**
- * Empty ByteString.
+ * Return the substring from {@code beginIndex}, inclusive, to the end of the
+ * string.
+ *
+ * @param beginIndex start at this index
+ * @return substring sharing underlying data
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
+ * {@code beginIndex > size()}.
*/
- public static final ByteString EMPTY = new ByteString(new byte[0]);
+ public ByteString substring(int beginIndex) {
+ return substring(beginIndex, size());
+ }
+
+ /**
+ * Return the substring from {@code beginIndex}, inclusive, to {@code
+ * endIndex}, exclusive.
+ *
+ * @param beginIndex start at this index
+ * @param endIndex the last character is the one before this index
+ * @return substring sharing underlying data
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
+ * {@code endIndex > size()}, or {@code beginIndex > endIndex}.
+ */
+ public abstract ByteString substring(int beginIndex, int endIndex);
+
+ /**
+ * Tests if this bytestring starts with the specified prefix.
+ * Similar to {@link String#startsWith(String)}
+ *
+ * @param prefix the prefix.
+ * @return <code>true</code> if the byte sequence represented by the
+ * argument is a prefix of the byte sequence represented by
+ * this string; <code>false</code> otherwise.
+ */
+ public boolean startsWith(ByteString prefix) {
+ return size() >= prefix.size() &&
+ substring(0, prefix.size()).equals(prefix);
+ }
+
+ // =================================================================
+ // byte[] -> ByteString
/**
* Copies the given bytes into a {@code ByteString}.
+ *
+ * @param bytes source array
+ * @param offset offset in source array
+ * @param size number of bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final byte[] bytes, final int offset,
- final int size) {
- final byte[] copy = new byte[size];
+ public static ByteString copyFrom(byte[] bytes, int offset, int size) {
+ byte[] copy = new byte[size];
System.arraycopy(bytes, offset, copy, 0, size);
- return new ByteString(copy);
+ return new LiteralByteString(copy);
}
/**
* Copies the given bytes into a {@code ByteString}.
+ *
+ * @param bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final byte[] bytes) {
+ public static ByteString copyFrom(byte[] bytes) {
return copyFrom(bytes, 0, bytes.length);
}
/**
- * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into
+ * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
+ *
+ * @param bytes source buffer
+ * @param size number of bytes to copy
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final ByteBuffer bytes, final int size) {
- final byte[] copy = new byte[size];
+ public static ByteString copyFrom(ByteBuffer bytes, int size) {
+ byte[] copy = new byte[size];
bytes.get(copy);
- return new ByteString(copy);
+ return new LiteralByteString(copy);
}
/**
* Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
* a {@code ByteString}.
+ *
+ * @param bytes sourceBuffer
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(final ByteBuffer bytes) {
+ public static ByteString copyFrom(ByteBuffer bytes) {
return copyFrom(bytes, bytes.remaining());
}
/**
* Encodes {@code text} into a sequence of bytes using the named charset
* and returns the result as a {@code ByteString}.
+ *
+ * @param text source string
+ * @param charsetName encoding to use
+ * @return new {@code ByteString}
+ * @throws UnsupportedEncodingException if the encoding isn't found
*/
- public static ByteString copyFrom(final String text, final String charsetName)
+ public static ByteString copyFrom(String text, String charsetName)
throws UnsupportedEncodingException {
- return new ByteString(text.getBytes(charsetName));
+ return new LiteralByteString(text.getBytes(charsetName));
}
/**
* Encodes {@code text} into a sequence of UTF-8 bytes and returns the
* result as a {@code ByteString}.
+ *
+ * @param text source string
+ * @return new {@code ByteString}
*/
- public static ByteString copyFromUtf8(final String text) {
+ public static ByteString copyFromUtf8(String text) {
try {
- return new ByteString(text.getBytes("UTF-8"));
+ return new LiteralByteString(text.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
}
+ // =================================================================
+ // InputStream -> ByteString
+
/**
- * Concatenates all byte strings in the list and returns the result.
+ * Completely reads the given stream's bytes into a
+ * {@code ByteString}, blocking if necessary until all bytes are
+ * read through to the end of the stream.
+ *
+ * <b>Performance notes:</b> The returned {@code ByteString} is an
+ * immutable tree of byte arrays ("chunks") of the stream data. The
+ * first chunk is small, with subsequent chunks each being double
+ * the size, up to 8K. If the caller knows the precise length of
+ * the stream and wishes to avoid all unnecessary copies and
+ * allocations, consider using the two-argument version of this
+ * method, below.
+ *
+ * @param streamToDrain The source stream, which is read completely
+ * but not closed.
+ * @return A new {@code ByteString} which is made up of chunks of
+ * various sizes, depending on the behavior of the underlying
+ * stream.
+ * @throws IOException IOException is thrown if there is a problem
+ * reading the underlying stream.
+ */
+ public static ByteString readFrom(InputStream streamToDrain)
+ throws IOException {
+ return readFrom(
+ streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
+ }
+
+ /**
+ * Completely reads the given stream's bytes into a
+ * {@code ByteString}, blocking if necessary until all bytes are
+ * read through to the end of the stream.
+ *
+ * <b>Performance notes:</b> The returned {@code ByteString} is an
+ * immutable tree of byte arrays ("chunks") of the stream data. The
+ * chunkSize parameter sets the size of these byte arrays. In
+ * particular, if the chunkSize is precisely the same as the length
+ * of the stream, unnecessary allocations and copies will be
+ * avoided. Otherwise, the chunks will be of the given size, except
+ * for the last chunk, which will be resized (via a reallocation and
+ * copy) to contain the remainder of the stream.
+ *
+ * @param streamToDrain The source stream, which is read completely
+ * but not closed.
+ * @param chunkSize The size of the chunks in which to read the
+ * stream.
+ * @return A new {@code ByteString} which is made up of chunks of
+ * the given size.
+ * @throws IOException IOException is thrown if there is a problem
+ * reading the underlying stream.
+ */
+ public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
+ throws IOException {
+ return readFrom(streamToDrain, chunkSize, chunkSize);
+ }
+
+ // Helper method that takes the chunk size range as a parameter.
+ public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
+ int maxChunkSize) throws IOException {
+ Collection<ByteString> results = new ArrayList<ByteString>();
+
+ // copy the inbound bytes into a list of chunks; the chunk size
+ // grows exponentially to support both short and long streams.
+ int chunkSize = minChunkSize;
+ while (true) {
+ ByteString chunk = readChunk(streamToDrain, chunkSize);
+ if (chunk == null) {
+ break;
+ }
+ results.add(chunk);
+ chunkSize = Math.min(chunkSize * 2, maxChunkSize);
+ }
+
+ return ByteString.copyFrom(results);
+ }
+
+ /**
+ * Blocks until a chunk of the given size can be made from the
+ * stream, or EOF is reached. Calls read() repeatedly in case the
+ * given stream implementation doesn't completely fill the given
+ * buffer in one read() call.
+ *
+ * @return A chunk of the desired size, or else a chunk as large as
+ * was available when end of stream was reached. Returns null if the
+ * given stream had no more data in it.
+ */
+ private static ByteString readChunk(InputStream in, final int chunkSize)
+ throws IOException {
+ final byte[] buf = new byte[chunkSize];
+ int bytesRead = 0;
+ while (bytesRead < chunkSize) {
+ final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
+ if (count == -1) {
+ break;
+ }
+ bytesRead += count;
+ }
+
+ if (bytesRead == 0) {
+ return null;
+ } else {
+ return ByteString.copyFrom(buf, 0, bytesRead);
+ }
+ }
+
+ // =================================================================
+ // Multiple ByteStrings -> One ByteString
+
+ /**
+ * Concatenate the given {@code ByteString} to this one. Short concatenations,
+ * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
+ * produced by copying the underlying bytes (as per Rope.java, <a
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * BAP95 </a>. In general, the concatenate involves no copying.
+ *
+ * @param other string to concatenate
+ * @return a new {@code ByteString} instance
+ */
+ public ByteString concat(ByteString other) {
+ int thisSize = size();
+ int otherSize = other.size();
+ if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("ByteString would be too long: " +
+ thisSize + "+" + otherSize);
+ }
+
+ return RopeByteString.concatenate(this, other);
+ }
+
+ /**
+ * Concatenates all byte strings in the iterable and returns the result.
+ * This is designed to run in O(list size), not O(total bytes).
*
* <p>The returned {@code ByteString} is not necessarily a unique object.
* If the list is empty, the returned object is the singleton empty
* {@code ByteString}. If the list has only one element, that
* {@code ByteString} will be returned without copying.
+ *
+ * @param byteStrings strings to be concatenated
+ * @return new {@code ByteString}
*/
- public static ByteString copyFrom(List<ByteString> list) {
- if (list.size() == 0) {
- return EMPTY;
- } else if (list.size() == 1) {
- return list.get(0);
+ public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
+ Collection<ByteString> collection;
+ if (!(byteStrings instanceof Collection)) {
+ collection = new ArrayList<ByteString>();
+ for (ByteString byteString : byteStrings) {
+ collection.add(byteString);
+ }
+ } else {
+ collection = (Collection<ByteString>) byteStrings;
}
-
- int size = 0;
- for (ByteString str : list) {
- size += str.size();
+ ByteString result;
+ if (collection.isEmpty()) {
+ result = EMPTY;
+ } else {
+ result = balancedConcat(collection.iterator(), collection.size());
}
- byte[] bytes = new byte[size];
- int pos = 0;
- for (ByteString str : list) {
- System.arraycopy(str.bytes, 0, bytes, pos, str.size());
- pos += str.size();
+ return result;
+ }
+
+ // Internal function used by copyFrom(Iterable<ByteString>).
+ // Create a balanced concatenation of the next "length" elements from the
+ // iterable.
+ private static ByteString balancedConcat(Iterator<ByteString> iterator,
+ int length) {
+ assert length >= 1;
+ ByteString result;
+ if (length == 1) {
+ result = iterator.next();
+ } else {
+ int halfLength = length >>> 1;
+ ByteString left = balancedConcat(iterator, halfLength);
+ ByteString right = balancedConcat(iterator, length - halfLength);
+ result = left.concat(right);
}
- return new ByteString(bytes);
+ return result;
}
// =================================================================
@@ -174,206 +447,446 @@ public final class ByteString {
*
* @param target buffer to copy into
* @param offset in the target buffer
+ * @throws IndexOutOfBoundsException if the offset is negative or too large
*/
- public void copyTo(final byte[] target, final int offset) {
- System.arraycopy(bytes, 0, target, offset, bytes.length);
+ public void copyTo(byte[] target, int offset) {
+ copyTo(target, 0, offset, size());
}
/**
* Copies bytes into a buffer.
*
- * @param target buffer to copy into
+ * @param target buffer to copy into
* @param sourceOffset offset within these bytes
* @param targetOffset offset within the target buffer
- * @param size number of bytes to copy
+ * @param numberToCopy number of bytes to copy
+ * @throws IndexOutOfBoundsException if an offset or size is negative or too
+ * large
*/
- public void copyTo(final byte[] target, final int sourceOffset,
- final int targetOffset,
- final int size) {
- System.arraycopy(bytes, sourceOffset, target, targetOffset, size);
+ public void copyTo(byte[] target, int sourceOffset, int targetOffset,
+ int numberToCopy) {
+ if (sourceOffset < 0) {
+ throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
+ }
+ if (targetOffset < 0) {
+ throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
+ }
+ if (numberToCopy < 0) {
+ throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
+ }
+ if (sourceOffset + numberToCopy > size()) {
+ throw new IndexOutOfBoundsException(
+ "Source end offset < 0: " + (sourceOffset + numberToCopy));
+ }
+ if (targetOffset + numberToCopy > target.length) {
+ throw new IndexOutOfBoundsException(
+ "Target end offset < 0: " + (targetOffset + numberToCopy));
+ }
+ if (numberToCopy > 0) {
+ copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+ }
}
/**
+ * Internal (package private) implementation of
+ * @link{#copyTo(byte[],int,int,int}.
+ * It assumes that all error checking has already been performed and that
+ * @code{numberToCopy > 0}.
+ */
+ protected abstract void copyToInternal(byte[] target, int sourceOffset,
+ int targetOffset, int numberToCopy);
+
+ /**
* Copies bytes into a ByteBuffer.
*
* @param target ByteBuffer to copy into.
- * @throws ReadOnlyBufferException if the {@code target} is read-only
- * @throws BufferOverflowException if the {@code target}'s remaining()
- * space is not large enough to hold the data.
+ * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
+ * @throws java.nio.BufferOverflowException if the {@code target}'s
+ * remaining() space is not large enough to hold the data.
*/
- public void copyTo(ByteBuffer target) {
- target.put(bytes, 0, bytes.length);
- }
+ public abstract void copyTo(ByteBuffer target);
/**
* Copies bytes to a {@code byte[]}.
+ *
+ * @return copied bytes
*/
public byte[] toByteArray() {
- final int size = bytes.length;
- final byte[] copy = new byte[size];
- System.arraycopy(bytes, 0, copy, 0, size);
- return copy;
+ int size = size();
+ byte[] result = new byte[size];
+ copyToInternal(result, 0, 0, size);
+ return result;
}
/**
- * Constructs a new read-only {@code java.nio.ByteBuffer} with the
- * same backing byte array.
+ * Writes the complete contents of this byte string to
+ * the specified output stream argument.
+ *
+ * @param out the output stream to which to write the data.
+ * @throws IOException if an I/O error occurs.
*/
- public ByteBuffer asReadOnlyByteBuffer() {
- final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
- return byteBuffer.asReadOnlyBuffer();
- }
+ public abstract void writeTo(OutputStream out) throws IOException;
+
+ /**
+ * Constructs a read-only {@code java.nio.ByteBuffer} whose content
+ * is equal to the contents of this byte string.
+ * The result uses the same backing array as the byte string, if possible.
+ *
+ * @return wrapped bytes
+ */
+ public abstract ByteBuffer asReadOnlyByteBuffer();
+
+ /**
+ * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
+ * such that the concatenation of their contents is equal to the contents
+ * of this byte string. The result uses the same backing arrays as the
+ * byte string.
+ * <p>
+ * By returning a list, implementations of this method may be able to avoid
+ * copying even when there are multiple backing arrays.
+ *
+ * @return a list of wrapped bytes
+ */
+ public abstract List<ByteBuffer> asReadOnlyByteBufferList();
/**
* Constructs a new {@code String} by decoding the bytes using the
* specified charset.
+ *
+ * @param charsetName encode using this charset
+ * @return new string
+ * @throws UnsupportedEncodingException if charset isn't recognized
*/
- public String toString(final String charsetName)
- throws UnsupportedEncodingException {
- return new String(bytes, charsetName);
- }
+ public abstract String toString(String charsetName)
+ throws UnsupportedEncodingException;
+
+ // =================================================================
+ // UTF-8 decoding
/**
* Constructs a new {@code String} by decoding the bytes as UTF-8.
+ *
+ * @return new string using UTF-8 encoding
*/
public String toStringUtf8() {
try {
- return new String(bytes, "UTF-8");
+ return toString("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("UTF-8 not supported?", e);
}
}
+ /**
+ * Tells whether this {@code ByteString} represents a well-formed UTF-8
+ * byte sequence, such that the original bytes can be converted to a
+ * String object and then round tripped back to bytes without loss.
+ *
+ * <p>More precisely, returns {@code true} whenever: <pre> {@code
+ * Arrays.equals(byteString.toByteArray(),
+ * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>This method returns {@code false} for "overlong" byte sequences,
+ * as well as for 3-byte sequences that would map to a surrogate
+ * character, in accordance with the restricted definition of UTF-8
+ * introduced in Unicode 3.1. Note that the UTF-8 decoder included in
+ * Oracle's JDK has been modified to also reject "overlong" byte
+ * sequences, but (as of 2011) still accepts 3-byte surrogate
+ * character byte sequences.
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * @return whether the bytes in this {@code ByteString} are a
+ * well-formed UTF-8 byte sequence
+ */
+ public abstract boolean isValidUtf8();
+
+ /**
+ * Tells whether the given byte sequence is a well-formed, malformed, or
+ * incomplete UTF-8 byte sequence. This method accepts and returns a partial
+ * state result, allowing the bytes for a complete UTF-8 byte sequence to be
+ * composed from multiple {@code ByteString} segments.
+ *
+ * @param state either {@code 0} (if this is the initial decoding operation)
+ * or the value returned from a call to a partial decoding method for the
+ * previous bytes
+ * @param offset offset of the first byte to check
+ * @param length number of bytes to check
+ *
+ * @return {@code -1} if the partial byte sequence is definitely malformed,
+ * {@code 0} if it is well-formed (no additional input needed), or, if the
+ * byte sequence is "incomplete", i.e. apparently terminated in the middle of
+ * a character, an opaque integer "state" value containing enough information
+ * to decode the character when passed to a subsequent invocation of a
+ * partial decoding method.
+ */
+ protected abstract int partialIsValidUtf8(int state, int offset, int length);
+
// =================================================================
// equals() and hashCode()
@Override
- public boolean equals(final Object o) {
- if (o == this) {
- return true;
- }
-
- if (!(o instanceof ByteString)) {
- return false;
- }
-
- final ByteString other = (ByteString) o;
- final int size = bytes.length;
- if (size != other.bytes.length) {
- return false;
- }
-
- final byte[] thisBytes = bytes;
- final byte[] otherBytes = other.bytes;
- for (int i = 0; i < size; i++) {
- if (thisBytes[i] != otherBytes[i]) {
- return false;
- }
- }
-
- return true;
- }
-
- private volatile int hash = 0;
+ public abstract boolean equals(Object o);
+ /**
+ * Return a non-zero hashCode depending only on the sequence of bytes
+ * in this ByteString.
+ *
+ * @return hashCode value for this object
+ */
@Override
- public int hashCode() {
- int h = hash;
-
- if (h == 0) {
- final byte[] thisBytes = bytes;
- final int size = bytes.length;
-
- h = size;
- for (int i = 0; i < size; i++) {
- h = h * 31 + thisBytes[i];
- }
- if (h == 0) {
- h = 1;
- }
-
- hash = h;
- }
-
- return h;
- }
+ public abstract int hashCode();
// =================================================================
// Input stream
/**
* Creates an {@code InputStream} which can be used to read the bytes.
+ * <p>
+ * The {@link InputStream} returned by this method is guaranteed to be
+ * completely non-blocking. The method {@link InputStream#available()}
+ * returns the number of bytes remaining in the stream. The methods
+ * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
+ * and {@link InputStream#skip(long)} will read/skip as many bytes as are
+ * available.
+ * <p>
+ * The methods in the returned {@link InputStream} might <b>not</b> be
+ * thread safe.
+ *
+ * @return an input stream that returns the bytes of this byte string.
*/
- public InputStream newInput() {
- return new ByteArrayInputStream(bytes);
- }
+ public abstract InputStream newInput();
/**
* Creates a {@link CodedInputStream} which can be used to read the bytes.
- * Using this is more efficient than creating a {@link CodedInputStream}
- * wrapping the result of {@link #newInput()}.
+ * Using this is often more efficient than creating a {@link CodedInputStream}
+ * that wraps the result of {@link #newInput()}.
+ *
+ * @return stream based on wrapped data
*/
- public CodedInputStream newCodedInput() {
- // We trust CodedInputStream not to modify the bytes, or to give anyone
- // else access to them.
- return CodedInputStream.newInstance(bytes);
- }
+ public abstract CodedInputStream newCodedInput();
// =================================================================
// Output stream
/**
- * Creates a new {@link Output} with the given initial capacity.
+ * Creates a new {@link Output} with the given initial capacity. Call {@link
+ * Output#toByteString()} to create the {@code ByteString} instance.
+ * <p>
+ * A {@link ByteString.Output} offers the same functionality as a
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+ * rather than a {@code byte} array.
+ *
+ * @param initialCapacity estimate of number of bytes to be written
+ * @return {@code OutputStream} for building a {@code ByteString}
*/
- public static Output newOutput(final int initialCapacity) {
- return new Output(new ByteArrayOutputStream(initialCapacity));
+ public static Output newOutput(int initialCapacity) {
+ return new Output(initialCapacity);
}
/**
- * Creates a new {@link Output}.
+ * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
+ * the {@code ByteString} instance.
+ * <p>
+ * A {@link ByteString.Output} offers the same functionality as a
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+ * rather than a {@code byte array}.
+ *
+ * @return {@code OutputStream} for building a {@code ByteString}
*/
public static Output newOutput() {
- return newOutput(32);
+ return new Output(CONCATENATE_BY_COPY_SIZE);
}
/**
* Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
* create the {@code ByteString} instance.
*/
- public static final class Output extends FilterOutputStream {
- private final ByteArrayOutputStream bout;
+ public static final class Output extends OutputStream {
+ // Implementation note.
+ // The public methods of this class must be synchronized. ByteStrings
+ // are guaranteed to be immutable. Without some sort of locking, it could
+ // be possible for one thread to call toByteSring(), while another thread
+ // is still modifying the underlying byte array.
+
+ private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+ // argument passed by user, indicating initial capacity.
+ private final int initialCapacity;
+ // ByteStrings to be concatenated to create the result
+ private final ArrayList<ByteString> flushedBuffers;
+ // Total number of bytes in the ByteStrings of flushedBuffers
+ private int flushedBuffersTotalBytes;
+ // Current buffer to which we are writing
+ private byte[] buffer;
+ // Location in buffer[] to which we write the next byte.
+ private int bufferPos;
/**
- * Constructs a new output with the given initial capacity.
+ * Creates a new ByteString output stream with the specified
+ * initial capacity.
+ *
+ * @param initialCapacity the initial capacity of the output stream.
*/
- private Output(final ByteArrayOutputStream bout) {
- super(bout);
- this.bout = bout;
+ Output(int initialCapacity) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Buffer size < 0");
+ }
+ this.initialCapacity = initialCapacity;
+ this.flushedBuffers = new ArrayList<ByteString>();
+ this.buffer = new byte[initialCapacity];
+ }
+
+ @Override
+ public synchronized void write(int b) {
+ if (bufferPos == buffer.length) {
+ flushFullBuffer(1);
+ }
+ buffer[bufferPos++] = (byte)b;
+ }
+
+ @Override
+ public synchronized void write(byte[] b, int offset, int length) {
+ if (length <= buffer.length - bufferPos) {
+ // The bytes can fit into the current buffer.
+ System.arraycopy(b, offset, buffer, bufferPos, length);
+ bufferPos += length;
+ } else {
+ // Use up the current buffer
+ int copySize = buffer.length - bufferPos;
+ System.arraycopy(b, offset, buffer, bufferPos, copySize);
+ offset += copySize;
+ length -= copySize;
+ // Flush the buffer, and get a new buffer at least big enough to cover
+ // what we still need to output
+ flushFullBuffer(length);
+ System.arraycopy(b, offset, buffer, 0 /* count */, length);
+ bufferPos = length;
+ }
+ }
+
+ /**
+ * Creates a byte string. Its size is the current size of this output
+ * stream and its output has been copied to it.
+ *
+ * @return the current contents of this output stream, as a byte string.
+ */
+ public synchronized ByteString toByteString() {
+ flushLastBuffer();
+ return ByteString.copyFrom(flushedBuffers);
+ }
+
+ /**
+ * Writes the complete contents of this byte array output stream to
+ * the specified output stream argument.
+ *
+ * @param out the output stream to which to write the data.
+ * @throws IOException if an I/O error occurs.
+ */
+ public void writeTo(OutputStream out) throws IOException {
+ ByteString[] cachedFlushBuffers;
+ byte[] cachedBuffer;
+ int cachedBufferPos;
+ synchronized (this) {
+ // Copy the information we need into local variables so as to hold
+ // the lock for as short a time as possible.
+ cachedFlushBuffers =
+ flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
+ cachedBuffer = buffer;
+ cachedBufferPos = bufferPos;
+ }
+ for (ByteString byteString : cachedFlushBuffers) {
+ byteString.writeTo(out);
+ }
+
+ out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos));
+ }
+
+ /**
+ * Returns the current size of the output stream.
+ *
+ * @return the current size of the output stream
+ */
+ public synchronized int size() {
+ return flushedBuffersTotalBytes + bufferPos;
+ }
+
+ /**
+ * Resets this stream, so that all currently accumulated output in the
+ * output stream is discarded. The output stream can be used again,
+ * reusing the already allocated buffer space.
+ */
+ public synchronized void reset() {
+ flushedBuffers.clear();
+ flushedBuffersTotalBytes = 0;
+ bufferPos = 0;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("<ByteString.Output@%s size=%d>",
+ Integer.toHexString(System.identityHashCode(this)), size());
}
/**
- * Creates a {@code ByteString} instance from this {@code Output}.
+ * Internal function used by writers. The current buffer is full, and the
+ * writer needs a new buffer whose size is at least the specified minimum
+ * size.
*/
- public ByteString toByteString() {
- final byte[] byteArray = bout.toByteArray();
- return new ByteString(byteArray);
+ private void flushFullBuffer(int minSize) {
+ flushedBuffers.add(new LiteralByteString(buffer));
+ flushedBuffersTotalBytes += buffer.length;
+ // We want to increase our total capacity by 50%, but as a minimum,
+ // the new buffer should also at least be >= minSize and
+ // >= initial Capacity.
+ int newSize = Math.max(initialCapacity,
+ Math.max(minSize, flushedBuffersTotalBytes >>> 1));
+ buffer = new byte[newSize];
+ bufferPos = 0;
+ }
+
+ /**
+ * Internal function used by {@link #toByteString()}. The current buffer may
+ * or may not be full, but it needs to be flushed.
+ */
+ private void flushLastBuffer() {
+ if (bufferPos < buffer.length) {
+ if (bufferPos > 0) {
+ byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos);
+ flushedBuffers.add(new LiteralByteString(bufferCopy));
+ }
+ // We reuse this buffer for further writes.
+ } else {
+ // Buffer is completely full. Huzzah.
+ flushedBuffers.add(new LiteralByteString(buffer));
+ // 99% of the time, we're not going to use this OutputStream again.
+ // We set buffer to an empty byte stream so that we're handling this
+ // case without wasting space. In the rare case that more writes
+ // *do* occur, this empty buffer will be flushed and an appropriately
+ // sized new buffer will be created.
+ buffer = EMPTY_BYTE_ARRAY;
+ }
+ flushedBuffersTotalBytes += bufferPos;
+ bufferPos = 0;
}
}
/**
- * Constructs a new ByteString builder, which allows you to efficiently
- * construct a {@code ByteString} by writing to a {@link CodedOutputStream}.
- * Using this is much more efficient than calling {@code newOutput()} and
- * wrapping that in a {@code CodedOutputStream}.
+ * Constructs a new {@code ByteString} builder, which allows you to
+ * efficiently construct a {@code ByteString} by writing to a {@link
+ * CodedOutputStream}. Using this is much more efficient than calling {@code
+ * newOutput()} and wrapping that in a {@code CodedOutputStream}.
*
* <p>This is package-private because it's a somewhat confusing interface.
* Users can call {@link Message#toByteString()} instead of calling this
* directly.
*
- * @param size The target byte size of the {@code ByteString}. You must
- * write exactly this many bytes before building the result.
+ * @param size The target byte size of the {@code ByteString}. You must write
+ * exactly this many bytes before building the result.
+ * @return the builder
*/
- static CodedBuilder newCodedBuilder(final int size) {
+ static CodedBuilder newCodedBuilder(int size) {
return new CodedBuilder(size);
}
@@ -382,7 +895,7 @@ public final class ByteString {
private final CodedOutputStream output;
private final byte[] buffer;
- private CodedBuilder(final int size) {
+ private CodedBuilder(int size) {
buffer = new byte[size];
output = CodedOutputStream.newInstance(buffer);
}
@@ -393,11 +906,57 @@ public final class ByteString {
// We can be confident that the CodedOutputStream will not modify the
// underlying bytes anymore because it already wrote all of them. So,
// no need to make a copy.
- return new ByteString(buffer);
+ return new LiteralByteString(buffer);
}
public CodedOutputStream getCodedOutput() {
return output;
}
}
+
+ // =================================================================
+ // Methods {@link RopeByteString} needs on instances, which aren't part of the
+ // public API.
+
+ /**
+ * Return the depth of the tree representing this {@code ByteString}, if any,
+ * whose root is this node. If this is a leaf node, return 0.
+ *
+ * @return tree depth or zero
+ */
+ protected abstract int getTreeDepth();
+
+ /**
+ * Return {@code true} if this ByteString is literal (a leaf node) or a
+ * flat-enough tree in the sense of {@link RopeByteString}.
+ *
+ * @return true if the tree is flat enough
+ */
+ protected abstract boolean isBalanced();
+
+ /**
+ * Return the cached hash code if available.
+ *
+ * @return value of cached hash code or 0 if not computed yet
+ */
+ protected abstract int peekCachedHashCode();
+
+ /**
+ * Compute the hash across the value bytes starting with the given hash, and
+ * return the result. This is used to compute the hash across strings
+ * represented as a set of pieces by allowing the hash computation to be
+ * continued from piece to piece.
+ *
+ * @param h starting hash value
+ * @param offset offset into this value to start looking at data values
+ * @param length number of data values to include in the hash computation
+ * @return ending hash value
+ */
+ protected abstract int partialHash(int h, int offset, int length);
+
+ @Override
+ public String toString() {
+ return String.format("<ByteString@%s size=%d>",
+ Integer.toHexString(System.identityHashCode(this)), size());
+ }
}
diff --git a/java/src/main/java/com/google/protobuf/CodedInputStream.java b/java/src/main/java/com/google/protobuf/CodedInputStream.java
index b3e08555..33417a7f 100644
--- a/java/src/main/java/com/google/protobuf/CodedInputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedInputStream.java
@@ -243,6 +243,23 @@ public final class CodedInputStream {
--recursionDepth;
}
+ /** Read a {@code group} field value from the stream. */
+ public <T extends MessageLite> T readGroup(
+ final int fieldNumber,
+ final Parser<T> parser,
+ final ExtensionRegistryLite extensionRegistry)
+ throws IOException {
+ if (recursionDepth >= recursionLimit) {
+ throw InvalidProtocolBufferException.recursionLimitExceeded();
+ }
+ ++recursionDepth;
+ T result = parser.parsePartialFrom(this, extensionRegistry);
+ checkLastTagWas(
+ WireFormat.makeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP));
+ --recursionDepth;
+ return result;
+ }
+
/**
* Reads a {@code group} field value from the stream and merges it into the
* given {@link UnknownFieldSet}.
@@ -278,6 +295,24 @@ public final class CodedInputStream {
popLimit(oldLimit);
}
+ /** Read an embedded message field value from the stream. */
+ public <T extends MessageLite> T readMessage(
+ final Parser<T> parser,
+ final ExtensionRegistryLite extensionRegistry)
+ throws IOException {
+ int length = readRawVarint32();
+ if (recursionDepth >= recursionLimit) {
+ throw InvalidProtocolBufferException.recursionLimitExceeded();
+ }
+ final int oldLimit = pushLimit(length);
+ ++recursionDepth;
+ T result = parser.parsePartialFrom(this, extensionRegistry);
+ checkLastTagWas(0);
+ --recursionDepth;
+ popLimit(oldLimit);
+ return result;
+ }
+
/** Read a {@code bytes} field value from the stream. */
public ByteString readBytes() throws IOException {
final int size = readRawVarint32();
@@ -601,7 +636,7 @@ public final class CodedInputStream {
* refreshing its buffer. If you need to prevent reading past a certain
* point in the underlying {@code InputStream} (e.g. because you expect it to
* contain more data after the end of the message which you need to handle
- * differently) then you must place a wrapper around you {@code InputStream}
+ * differently) then you must place a wrapper around your {@code InputStream}
* which limits the amount of data that can be read from it.
*
* @return the old limit.
@@ -676,7 +711,7 @@ public final class CodedInputStream {
/**
* Called with {@code this.buffer} is empty to read more bytes from the
- * input. If {@code mustSucceed} is true, refillBuffer() gurantees that
+ * input. If {@code mustSucceed} is true, refillBuffer() guarantees that
* either there will be at least one byte in the buffer when it returns
* or it will throw an exception. If {@code mustSucceed} is false,
* refillBuffer() returns false if no more bytes were available.
@@ -879,7 +914,7 @@ public final class CodedInputStream {
refillBuffer(true);
}
- bufferPos = size - pos;
+ bufferPos = size - pos;
}
}
}
diff --git a/java/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
index ac5f2d30..ca24638d 100644
--- a/java/src/main/java/com/google/protobuf/CodedOutputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -30,10 +30,10 @@
package com.google.protobuf;
-import java.io.OutputStream;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
/**
* Encodes and writes protocol message fields.
@@ -540,6 +540,15 @@ public final class CodedOutputStream {
}
/**
+ * Compute the number of bytes that would be needed to encode an
+ * embedded message in lazy field, including tag.
+ */
+ public static int computeLazyFieldSize(final int fieldNumber,
+ final LazyField value) {
+ return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value);
+ }
+
+ /**
* Compute the number of bytes that would be needed to encode a
* {@code uint32} field, including tag.
*/
@@ -614,6 +623,18 @@ public final class CodedOutputStream {
computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value);
}
+ /**
+ * Compute the number of bytes that would be needed to encode an
+ * lazily parsed MessageSet extension field to the stream. For
+ * historical reasons, the wire format differs from normal fields.
+ */
+ public static int computeLazyFieldMessageSetExtensionSize(
+ final int fieldNumber, final LazyField value) {
+ return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
+ computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
+ computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+ }
+
// -----------------------------------------------------------------
/**
@@ -730,6 +751,15 @@ public final class CodedOutputStream {
}
/**
+ * Compute the number of bytes that would be needed to encode an embedded
+ * message stored in lazy field.
+ */
+ public static int computeLazyFieldSizeNoTag(final LazyField value) {
+ final int size = value.getSerializedSize();
+ return computeRawVarint32Size(size) + size;
+ }
+
+ /**
* Compute the number of bytes that would be needed to encode a
* {@code bytes} field.
*/
diff --git a/java/src/main/java/com/google/protobuf/Descriptors.java b/java/src/main/java/com/google/protobuf/Descriptors.java
index 2ee84594..a4913053 100644
--- a/java/src/main/java/com/google/protobuf/Descriptors.java
+++ b/java/src/main/java/com/google/protobuf/Descriptors.java
@@ -35,8 +35,10 @@ import com.google.protobuf.DescriptorProtos.*;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.io.UnsupportedEncodingException;
/**
@@ -106,6 +108,11 @@ public final class Descriptors {
return Collections.unmodifiableList(Arrays.asList(dependencies));
}
+ /** Get a list of this file's public dependencies (public imports). */
+ public List<FileDescriptor> getPublicDependencies() {
+ return Collections.unmodifiableList(Arrays.asList(publicDependencies));
+ }
+
/**
* Find a message type in the file by name. Does not find nested types.
*
@@ -216,7 +223,7 @@ public final class Descriptors {
public static FileDescriptor buildFrom(final FileDescriptorProto proto,
final FileDescriptor[] dependencies)
throws DescriptorValidationException {
- // Building decsriptors involves two steps: translating and linking.
+ // Building descriptors involves two steps: translating and linking.
// In the translation step (implemented by FileDescriptor's
// constructor), we build an object tree mirroring the
// FileDescriptorProto's tree and put all of the descriptors into the
@@ -317,12 +324,12 @@ public final class Descriptors {
* {@link FileDescriptor#internalBuildGeneratedFileFrom}, the caller
* provides a callback implementing this interface. The callback is called
* after the FileDescriptor has been constructed, in order to assign all
- * the global variales defined in the generated code which point at parts
+ * the global variables defined in the generated code which point at parts
* of the FileDescriptor. The callback returns an ExtensionRegistry which
* contains any extensions which might be used in the descriptor -- that
* is, extensions of the various "Options" messages defined in
* descriptor.proto. The callback may also return null to indicate that
- * no extensions are used in the decsriptor.
+ * no extensions are used in the descriptor.
*/
public interface InternalDescriptorAssigner {
ExtensionRegistry assignDescriptors(FileDescriptor root);
@@ -334,6 +341,7 @@ public final class Descriptors {
private final ServiceDescriptor[] services;
private final FieldDescriptor[] extensions;
private final FileDescriptor[] dependencies;
+ private final FileDescriptor[] publicDependencies;
private final DescriptorPool pool;
private FileDescriptor(final FileDescriptorProto proto,
@@ -343,6 +351,17 @@ public final class Descriptors {
this.pool = pool;
this.proto = proto;
this.dependencies = dependencies.clone();
+ this.publicDependencies =
+ new FileDescriptor[proto.getPublicDependencyCount()];
+ for (int i = 0; i < proto.getPublicDependencyCount(); i++) {
+ int index = proto.getPublicDependency(i);
+ if (index < 0 || index >= this.dependencies.length) {
+ throw new DescriptorValidationException(this,
+ "Invalid public dependency index.");
+ }
+ this.publicDependencies[i] =
+ this.dependencies[proto.getPublicDependency(i)];
+ }
pool.addPackage(getPackage(), this);
@@ -390,7 +409,7 @@ public final class Descriptors {
* in the original. This method is needed for bootstrapping when a file
* defines custom options. The options may be defined in the file itself,
* so we can't actually parse them until we've constructed the descriptors,
- * but to construct the decsriptors we have to have parsed the descriptor
+ * but to construct the descriptors we have to have parsed the descriptor
* protos. So, we have to parse the descriptor protos a second time after
* constructing the descriptors.
*/
@@ -641,7 +660,7 @@ public final class Descriptors {
FieldSet.FieldDescriptorLite<FieldDescriptor> {
/**
* Get the index of this descriptor within its parent.
- * @see Descriptor#getIndex()
+ * @see Descriptors.Descriptor#getIndex()
*/
public int getIndex() { return index; }
@@ -656,7 +675,7 @@ public final class Descriptors {
/**
* Get the field's fully-qualified name.
- * @see Descriptor#getFullName()
+ * @see Descriptors.Descriptor#getFullName()
*/
public String getFullName() { return fullName; }
@@ -943,7 +962,8 @@ public final class Descriptors {
private void crossLink() throws DescriptorValidationException {
if (proto.hasExtendee()) {
final GenericDescriptor extendee =
- file.pool.lookupSymbol(proto.getExtendee(), this);
+ file.pool.lookupSymbol(proto.getExtendee(), this,
+ DescriptorPool.SearchFilter.TYPES_ONLY);
if (!(extendee instanceof Descriptor)) {
throw new DescriptorValidationException(this,
'\"' + proto.getExtendee() + "\" is not a message type.");
@@ -960,7 +980,8 @@ public final class Descriptors {
if (proto.hasTypeName()) {
final GenericDescriptor typeDescriptor =
- file.pool.lookupSymbol(proto.getTypeName(), this);
+ file.pool.lookupSymbol(proto.getTypeName(), this,
+ DescriptorPool.SearchFilter.TYPES_ONLY);
if (!proto.hasType()) {
// Choose field type based on symbol.
@@ -1149,7 +1170,7 @@ public final class Descriptors {
implements GenericDescriptor, Internal.EnumLiteMap<EnumValueDescriptor> {
/**
* Get the index of this descriptor within its parent.
- * @see Descriptor#getIndex()
+ * @see Descriptors.Descriptor#getIndex()
*/
public int getIndex() { return index; }
@@ -1161,7 +1182,7 @@ public final class Descriptors {
/**
* Get the type's fully-qualified name.
- * @see Descriptor#getFullName()
+ * @see Descriptors.Descriptor#getFullName()
*/
public String getFullName() { return fullName; }
@@ -1182,7 +1203,7 @@ public final class Descriptors {
/**
* Find an enum value by name.
* @param name The unqualified name of the value (e.g. "FOO").
- * @return the value's decsriptor, or {@code null} if not found.
+ * @return the value's descriptor, or {@code null} if not found.
*/
public EnumValueDescriptor findValueByName(final String name) {
final GenericDescriptor result =
@@ -1198,7 +1219,7 @@ public final class Descriptors {
* Find an enum value by number. If multiple enum values have the same
* number, this returns the first defined value with that number.
* @param number The value's number.
- * @return the value's decsriptor, or {@code null} if not found.
+ * @return the value's descriptor, or {@code null} if not found.
*/
public EnumValueDescriptor findValueByNumber(final int number) {
return file.pool.enumValuesByNumber.get(
@@ -1261,7 +1282,7 @@ public final class Descriptors {
implements GenericDescriptor, Internal.EnumLite {
/**
* Get the index of this descriptor within its parent.
- * @see Descriptor#getIndex()
+ * @see Descriptors.Descriptor#getIndex()
*/
public int getIndex() { return index; }
@@ -1276,7 +1297,7 @@ public final class Descriptors {
/**
* Get the value's fully-qualified name.
- * @see Descriptor#getFullName()
+ * @see Descriptors.Descriptor#getFullName()
*/
public String getFullName() { return fullName; }
@@ -1337,7 +1358,7 @@ public final class Descriptors {
/**
* Get the type's fully-qualified name.
- * @see Descriptor#getFullName()
+ * @see Descriptors.Descriptor#getFullName()
*/
public String getFullName() { return fullName; }
@@ -1355,7 +1376,7 @@ public final class Descriptors {
/**
* Find a method by name.
* @param name The unqualified name of the method (e.g. "Foo").
- * @return the method's decsriptor, or {@code null} if not found.
+ * @return the method's descriptor, or {@code null} if not found.
*/
public MethodDescriptor findMethodByName(final String name) {
final GenericDescriptor result =
@@ -1427,7 +1448,7 @@ public final class Descriptors {
/**
* Get the method's fully-qualified name.
- * @see Descriptor#getFullName()
+ * @see Descriptors.Descriptor#getFullName()
*/
public String getFullName() { return fullName; }
@@ -1475,7 +1496,8 @@ public final class Descriptors {
private void crossLink() throws DescriptorValidationException {
final GenericDescriptor input =
- file.pool.lookupSymbol(proto.getInputType(), this);
+ file.pool.lookupSymbol(proto.getInputType(), this,
+ DescriptorPool.SearchFilter.TYPES_ONLY);
if (!(input instanceof Descriptor)) {
throw new DescriptorValidationException(this,
'\"' + proto.getInputType() + "\" is not a message type.");
@@ -1483,7 +1505,8 @@ public final class Descriptors {
inputType = (Descriptor)input;
final GenericDescriptor output =
- file.pool.lookupSymbol(proto.getOutputType(), this);
+ file.pool.lookupSymbol(proto.getOutputType(), this,
+ DescriptorPool.SearchFilter.TYPES_ONLY);
if (!(output instanceof Descriptor)) {
throw new DescriptorValidationException(this,
'\"' + proto.getOutputType() + "\" is not a message type.");
@@ -1535,7 +1558,7 @@ public final class Descriptors {
public String getProblemSymbolName() { return name; }
/**
- * Gets the the protocol message representation of the invalid descriptor.
+ * Gets the protocol message representation of the invalid descriptor.
*/
public Message getProblemProto() { return proto; }
@@ -1590,14 +1613,22 @@ public final class Descriptors {
* descriptors defined in a particular file.
*/
private static final class DescriptorPool {
+
+ /** Defines what subclass of descriptors to search in the descriptor pool.
+ */
+ enum SearchFilter {
+ TYPES_ONLY, AGGREGATES_ONLY, ALL_SYMBOLS
+ }
+
DescriptorPool(final FileDescriptor[] dependencies) {
- this.dependencies = new DescriptorPool[dependencies.length];
+ this.dependencies = new HashSet<FileDescriptor>();
- for (int i = 0; i < dependencies.length; i++) {
- this.dependencies[i] = dependencies[i].pool;
+ for (int i = 0; i < dependencies.length; i++) {
+ this.dependencies.add(dependencies[i]);
+ importPublicDependencies(dependencies[i]);
}
- for (final FileDescriptor dependency : dependencies) {
+ for (final FileDescriptor dependency : this.dependencies) {
try {
addPackage(dependency.getPackage(), dependency);
} catch (DescriptorValidationException e) {
@@ -1609,7 +1640,16 @@ public final class Descriptors {
}
}
- private final DescriptorPool[] dependencies;
+ /** Find and put public dependencies of the file into dependencies set.*/
+ private void importPublicDependencies(final FileDescriptor file) {
+ for (FileDescriptor dependency : file.getPublicDependencies()) {
+ if (dependencies.add(dependency)) {
+ importPublicDependencies(dependency);
+ }
+ }
+ }
+
+ private final Set<FileDescriptor> dependencies;
private final Map<String, GenericDescriptor> descriptorsByName =
new HashMap<String, GenericDescriptor>();
@@ -1620,39 +1660,81 @@ public final class Descriptors {
/** Find a generic descriptor by fully-qualified name. */
GenericDescriptor findSymbol(final String fullName) {
+ return findSymbol(fullName, SearchFilter.ALL_SYMBOLS);
+ }
+
+ /** Find a descriptor by fully-qualified name and given option to only
+ * search valid field type descriptors.
+ */
+ GenericDescriptor findSymbol(final String fullName,
+ final SearchFilter filter) {
GenericDescriptor result = descriptorsByName.get(fullName);
if (result != null) {
- return result;
+ if ((filter==SearchFilter.ALL_SYMBOLS) ||
+ ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+ ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+ return result;
+ }
}
- for (final DescriptorPool dependency : dependencies) {
- result = dependency.descriptorsByName.get(fullName);
+ for (final FileDescriptor dependency : dependencies) {
+ result = dependency.pool.descriptorsByName.get(fullName);
if (result != null) {
- return result;
+ if ((filter==SearchFilter.ALL_SYMBOLS) ||
+ ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+ ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+ return result;
+ }
}
}
return null;
}
+ /** Checks if the descriptor is a valid type for a message field. */
+ boolean isType(GenericDescriptor descriptor) {
+ return (descriptor instanceof Descriptor) ||
+ (descriptor instanceof EnumDescriptor);
+ }
+
+ /** Checks if the descriptor is a valid namespace type. */
+ boolean isAggregate(GenericDescriptor descriptor) {
+ return (descriptor instanceof Descriptor) ||
+ (descriptor instanceof EnumDescriptor) ||
+ (descriptor instanceof PackageDescriptor) ||
+ (descriptor instanceof ServiceDescriptor);
+ }
+
/**
- * Look up a descriptor by name, relative to some other descriptor.
+ * Look up a type descriptor by name, relative to some other descriptor.
* The name may be fully-qualified (with a leading '.'),
* partially-qualified, or unqualified. C++-like name lookup semantics
* are used to search for the matching descriptor.
*/
GenericDescriptor lookupSymbol(final String name,
- final GenericDescriptor relativeTo)
+ final GenericDescriptor relativeTo,
+ final DescriptorPool.SearchFilter filter)
throws DescriptorValidationException {
// TODO(kenton): This could be optimized in a number of ways.
GenericDescriptor result;
if (name.startsWith(".")) {
// Fully-qualified name.
- result = findSymbol(name.substring(1));
+ result = findSymbol(name.substring(1), filter);
} else {
// If "name" is a compound identifier, we want to search for the
// first component of it, then search within it for the rest.
+ // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
+ // defined in multiple parent scopes, we only want to find "Bar.baz" in
+ // the innermost one. E.g., the following should produce an error:
+ // message Bar { message Baz {} }
+ // message Foo {
+ // message Bar {
+ // }
+ // optional Bar.Baz baz = 1;
+ // }
+ // So, we look for just "Foo" first, then look for "Bar.baz" within it
+ // if found.
final int firstPartLength = name.indexOf('.');
final String firstPart;
if (firstPartLength == -1) {
@@ -1670,14 +1752,15 @@ public final class Descriptors {
// Chop off the last component of the scope.
final int dotpos = scopeToTry.lastIndexOf(".");
if (dotpos == -1) {
- result = findSymbol(name);
+ result = findSymbol(name, filter);
break;
} else {
scopeToTry.setLength(dotpos + 1);
- // Append firstPart and try to find.
+ // Append firstPart and try to find
scopeToTry.append(firstPart);
- result = findSymbol(scopeToTry.toString());
+ result = findSymbol(scopeToTry.toString(),
+ DescriptorPool.SearchFilter.AGGREGATES_ONLY);
if (result != null) {
if (firstPartLength != -1) {
@@ -1686,7 +1769,7 @@ public final class Descriptors {
// searching parent scopes.
scopeToTry.setLength(dotpos + 1);
scopeToTry.append(name);
- result = findSymbol(scopeToTry.toString());
+ result = findSymbol(scopeToTry.toString(), filter);
}
break;
}
@@ -1817,7 +1900,7 @@ public final class Descriptors {
/**
* Adds a field to the fieldsByNumber table. Throws an exception if a
- * field with hte same containing type and number already exists.
+ * field with the same containing type and number already exists.
*/
void addFieldByNumber(final FieldDescriptor field)
throws DescriptorValidationException {
diff --git a/java/src/main/java/com/google/protobuf/DynamicMessage.java b/java/src/main/java/com/google/protobuf/DynamicMessage.java
index c106b662..c0c9fc94 100644
--- a/java/src/main/java/com/google/protobuf/DynamicMessage.java
+++ b/java/src/main/java/com/google/protobuf/DynamicMessage.java
@@ -35,6 +35,7 @@ import com.google.protobuf.Descriptors.FieldDescriptor;
import java.io.InputStream;
import java.io.IOException;
+import java.util.Collections;
import java.util.Map;
/**
@@ -160,7 +161,9 @@ public final class DynamicMessage extends AbstractMessage {
verifyContainingType(field);
Object result = fields.getField(field);
if (result == null) {
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ if (field.isRepeated()) {
+ result = Collections.emptyList();
+ } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
result = getDefaultInstance(field.getMessageType());
} else {
result = field.getDefaultValue();
@@ -198,10 +201,12 @@ public final class DynamicMessage extends AbstractMessage {
return fields.isInitialized();
}
+ @Override
public boolean isInitialized() {
return isInitialized(type, fields);
}
+ @Override
public void writeTo(CodedOutputStream output) throws IOException {
if (type.getOptions().getMessageSetWireFormat()) {
fields.writeMessageSetTo(output);
@@ -212,6 +217,7 @@ public final class DynamicMessage extends AbstractMessage {
}
}
+ @Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
@@ -236,6 +242,26 @@ public final class DynamicMessage extends AbstractMessage {
return newBuilderForType().mergeFrom(this);
}
+ public Parser<DynamicMessage> getParserForType() {
+ return new AbstractParser<DynamicMessage>() {
+ public DynamicMessage parsePartialFrom(
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ Builder builder = newBuilder(type);
+ try {
+ builder.mergeFrom(input, extensionRegistry);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(builder.buildPartial());
+ } catch (IOException e) {
+ throw new InvalidProtocolBufferException(e.getMessage())
+ .setUnfinishedMessage(builder.buildPartial());
+ }
+ return builder.buildPartial();
+ }
+ };
+ }
+
/** Verifies that the field is a field of this message. */
private void verifyContainingType(FieldDescriptor field) {
if (field.getContainingType() != type) {
@@ -264,14 +290,18 @@ public final class DynamicMessage extends AbstractMessage {
// ---------------------------------------------------------------
// Implementation of Message.Builder interface.
+ @Override
public Builder clear() {
- if (fields == null) {
- throw new IllegalStateException("Cannot call clear() after build().");
+ if (fields.isImmutable()) {
+ fields = FieldSet.newFieldSet();
+ } else {
+ fields.clear();
}
- fields.clear();
+ unknownFields = UnknownFieldSet.getDefaultInstance();
return this;
}
+ @Override
public Builder mergeFrom(Message other) {
if (other instanceof DynamicMessage) {
// This should be somewhat faster than calling super.mergeFrom().
@@ -280,6 +310,7 @@ public final class DynamicMessage extends AbstractMessage {
throw new IllegalArgumentException(
"mergeFrom(Message) can only merge messages of the same type.");
}
+ ensureIsMutable();
fields.mergeFrom(otherDynamicMessage.fields);
mergeUnknownFields(otherDynamicMessage.unknownFields);
return this;
@@ -289,8 +320,7 @@ public final class DynamicMessage extends AbstractMessage {
}
public DynamicMessage build() {
- // If fields == null, we'll throw an appropriate exception later.
- if (fields != null && !isInitialized()) {
+ if (!isInitialized()) {
throw newUninitializedMessageException(
new DynamicMessage(type, fields, unknownFields));
}
@@ -312,21 +342,17 @@ public final class DynamicMessage extends AbstractMessage {
}
public DynamicMessage buildPartial() {
- if (fields == null) {
- throw new IllegalStateException(
- "build() has already been called on this Builder.");
- }
fields.makeImmutable();
DynamicMessage result =
new DynamicMessage(type, fields, unknownFields);
- fields = null;
- unknownFields = null;
return result;
}
+ @Override
public Builder clone() {
Builder result = new Builder(type);
result.fields.mergeFrom(fields);
+ result.mergeUnknownFields(unknownFields);
return result;
}
@@ -377,12 +403,14 @@ public final class DynamicMessage extends AbstractMessage {
public Builder setField(FieldDescriptor field, Object value) {
verifyContainingType(field);
+ ensureIsMutable();
fields.setField(field, value);
return this;
}
public Builder clearField(FieldDescriptor field) {
verifyContainingType(field);
+ ensureIsMutable();
fields.clearField(field);
return this;
}
@@ -400,12 +428,14 @@ public final class DynamicMessage extends AbstractMessage {
public Builder setRepeatedField(FieldDescriptor field,
int index, Object value) {
verifyContainingType(field);
+ ensureIsMutable();
fields.setRepeatedField(field, index, value);
return this;
}
public Builder addRepeatedField(FieldDescriptor field, Object value) {
verifyContainingType(field);
+ ensureIsMutable();
fields.addRepeatedField(field, value);
return this;
}
@@ -419,6 +449,7 @@ public final class DynamicMessage extends AbstractMessage {
return this;
}
+ @Override
public Builder mergeUnknownFields(UnknownFieldSet unknownFields) {
this.unknownFields =
UnknownFieldSet.newBuilder(this.unknownFields)
@@ -434,5 +465,18 @@ public final class DynamicMessage extends AbstractMessage {
"FieldDescriptor does not match message type.");
}
}
+
+ private void ensureIsMutable() {
+ if (fields.isImmutable()) {
+ fields = fields.clone();
+ }
+ }
+
+ @Override
+ public com.google.protobuf.Message.Builder getFieldBuilder(FieldDescriptor field) {
+ // TODO(xiangl): need implementation for dynamic message
+ throw new UnsupportedOperationException(
+ "getFieldBuilder() called on a dynamic message type.");
+ }
}
}
diff --git a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
index d5288dd8..1e1289d0 100644
--- a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
+++ b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
@@ -43,7 +43,7 @@ import java.util.Map;
* make sense to mix the two, since if you have any regular types in your
* program, you then require the full runtime and lose all the benefits of
* the lite runtime, so you might as well make all your types be regular types.
- * However, in some cases (e.g. when depending on multiple third-patry libraries
+ * However, in some cases (e.g. when depending on multiple third-party libraries
* where one uses lite types and one uses regular), you may find yourself
* wanting to mix the two. In this case things get more complicated.
* <p>
@@ -71,6 +71,22 @@ import java.util.Map;
* @author kenton@google.com Kenton Varda
*/
public class ExtensionRegistryLite {
+
+ // Set true to enable lazy parsing feature for MessageSet.
+ //
+ // TODO(xiangl): Now we use a global flag to control whether enable lazy
+ // parsing feature for MessageSet, which may be too crude for some
+ // applications. Need to support this feature on smaller granularity.
+ private static volatile boolean eagerlyParseMessageSets = false;
+
+ public static boolean isEagerlyParseMessageSets() {
+ return eagerlyParseMessageSets;
+ }
+
+ public static void setEagerlyParseMessageSets(boolean isEagerlyParse) {
+ eagerlyParseMessageSets = isEagerlyParse;
+ }
+
/** Construct a new, empty instance. */
public static ExtensionRegistryLite newInstance() {
return new ExtensionRegistryLite();
diff --git a/java/src/main/java/com/google/protobuf/FieldSet.java b/java/src/main/java/com/google/protobuf/FieldSet.java
index a85dbaa6..2663694f 100644
--- a/java/src/main/java/com/google/protobuf/FieldSet.java
+++ b/java/src/main/java/com/google/protobuf/FieldSet.java
@@ -30,12 +30,14 @@
package com.google.protobuf;
+import com.google.protobuf.LazyField.LazyIterator;
+
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.io.IOException;
/**
* A class which represents an arbitrary set of fields of some message type.
@@ -68,6 +70,7 @@ final class FieldSet<FieldDescriptorType extends
private final SmallSortedMap<FieldDescriptorType, Object> fields;
private boolean isImmutable;
+ private boolean hasLazyField = false;
/** Construct a new FieldSet. */
private FieldSet() {
@@ -95,7 +98,7 @@ final class FieldSet<FieldDescriptorType extends
FieldSet<T> emptySet() {
return DEFAULT_INSTANCE;
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings("rawtypes")
private static final FieldSet DEFAULT_INSTANCE = new FieldSet(true);
/** Make this FieldSet immutable from this point forward. */
@@ -109,7 +112,7 @@ final class FieldSet<FieldDescriptorType extends
}
/**
- * Retuns whether the FieldSet is immutable. This is true if it is the
+ * Returns whether the FieldSet is immutable. This is true if it is the
* {@link #emptySet} or if {@link #makeImmutable} were called.
*
* @return whether the FieldSet is immutable.
@@ -139,6 +142,7 @@ final class FieldSet<FieldDescriptorType extends
FieldDescriptorType descriptor = entry.getKey();
clone.setField(descriptor, entry.getValue());
}
+ clone.hasLazyField = hasLazyField;
return clone;
}
@@ -147,21 +151,52 @@ final class FieldSet<FieldDescriptorType extends
/** See {@link Message.Builder#clear()}. */
public void clear() {
fields.clear();
+ hasLazyField = false;
}
/**
* Get a simple map containing all the fields.
*/
public Map<FieldDescriptorType, Object> getAllFields() {
+ if (hasLazyField) {
+ SmallSortedMap<FieldDescriptorType, Object> result =
+ SmallSortedMap.newFieldMap(16);
+ for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+ cloneFieldEntry(result, fields.getArrayEntryAt(i));
+ }
+ for (Map.Entry<FieldDescriptorType, Object> entry :
+ fields.getOverflowEntries()) {
+ cloneFieldEntry(result, entry);
+ }
+ if (fields.isImmutable()) {
+ result.makeImmutable();
+ }
+ return result;
+ }
return fields.isImmutable() ? fields : Collections.unmodifiableMap(fields);
}
+ private void cloneFieldEntry(Map<FieldDescriptorType, Object> map,
+ Map.Entry<FieldDescriptorType, Object> entry) {
+ FieldDescriptorType key = entry.getKey();
+ Object value = entry.getValue();
+ if (value instanceof LazyField) {
+ map.put(key, ((LazyField) value).getValue());
+ } else {
+ map.put(key, value);
+ }
+ }
+
/**
* Get an iterator to the field map. This iterator should not be leaked out
- * of the protobuf library as it is not protected from mutation when
- * fields is not immutable.
+ * of the protobuf library as it is not protected from mutation when fields
+ * is not immutable.
*/
public Iterator<Map.Entry<FieldDescriptorType, Object>> iterator() {
+ if (hasLazyField) {
+ return new LazyIterator<FieldDescriptorType>(
+ fields.entrySet().iterator());
+ }
return fields.entrySet().iterator();
}
@@ -185,14 +220,18 @@ final class FieldSet<FieldDescriptorType extends
* to the caller to fetch the field's default value.
*/
public Object getField(final FieldDescriptorType descriptor) {
- return fields.get(descriptor);
+ Object o = fields.get(descriptor);
+ if (o instanceof LazyField) {
+ return ((LazyField) o).getValue();
+ }
+ return o;
}
/**
* Useful for implementing
* {@link Message.Builder#setField(Descriptors.FieldDescriptor,Object)}.
*/
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked", "rawtypes"})
public void setField(final FieldDescriptorType descriptor,
Object value) {
if (descriptor.isRepeated()) {
@@ -204,7 +243,7 @@ final class FieldSet<FieldDescriptorType extends
// Wrap the contents in a new list so that the caller cannot change
// the list's contents after setting it.
final List newList = new ArrayList();
- newList.addAll((List)value);
+ newList.addAll((List) value);
for (final Object element : newList) {
verifyType(descriptor.getLiteType(), element);
}
@@ -213,6 +252,9 @@ final class FieldSet<FieldDescriptorType extends
verifyType(descriptor.getLiteType(), value);
}
+ if (value instanceof LazyField) {
+ hasLazyField = true;
+ }
fields.put(descriptor, value);
}
@@ -222,6 +264,9 @@ final class FieldSet<FieldDescriptorType extends
*/
public void clearField(final FieldDescriptorType descriptor) {
fields.remove(descriptor);
+ if (fields.isEmpty()) {
+ hasLazyField = false;
+ }
}
/**
@@ -234,7 +279,7 @@ final class FieldSet<FieldDescriptorType extends
"getRepeatedField() can only be called on repeated fields.");
}
- final Object value = fields.get(descriptor);
+ final Object value = getField(descriptor);
if (value == null) {
return 0;
} else {
@@ -253,7 +298,7 @@ final class FieldSet<FieldDescriptorType extends
"getRepeatedField() can only be called on repeated fields.");
}
- final Object value = fields.get(descriptor);
+ final Object value = getField(descriptor);
if (value == null) {
throw new IndexOutOfBoundsException();
@@ -275,13 +320,13 @@ final class FieldSet<FieldDescriptorType extends
"getRepeatedField() can only be called on repeated fields.");
}
- final Object list = fields.get(descriptor);
+ final Object list = getField(descriptor);
if (list == null) {
throw new IndexOutOfBoundsException();
}
verifyType(descriptor.getLiteType(), value);
- ((List) list).set(index, value);
+ ((List<Object>) list).set(index, value);
}
/**
@@ -298,13 +343,13 @@ final class FieldSet<FieldDescriptorType extends
verifyType(descriptor.getLiteType(), value);
- final Object existingValue = fields.get(descriptor);
- List list;
+ final Object existingValue = getField(descriptor);
+ List<Object> list;
if (existingValue == null) {
- list = new ArrayList();
+ list = new ArrayList<Object>();
fields.put(descriptor, list);
} else {
- list = (List) existingValue;
+ list = (List<Object>) existingValue;
}
list.add(value);
@@ -338,7 +383,8 @@ final class FieldSet<FieldDescriptorType extends
break;
case MESSAGE:
// TODO(kenton): Caller must do type checking here, I guess.
- isValid = value instanceof MessageLite;
+ isValid =
+ (value instanceof MessageLite) || (value instanceof LazyField);
break;
}
@@ -392,8 +438,16 @@ final class FieldSet<FieldDescriptorType extends
}
}
} else {
- if (!((MessageLite) entry.getValue()).isInitialized()) {
- return false;
+ Object value = entry.getValue();
+ if (value instanceof MessageLite) {
+ if (!((MessageLite) value).isInitialized()) {
+ return false;
+ }
+ } else if (value instanceof LazyField) {
+ return true;
+ } else {
+ throw new IllegalArgumentException(
+ "Wrong object type used with protocol message reflection.");
}
}
}
@@ -416,7 +470,8 @@ final class FieldSet<FieldDescriptorType extends
}
/**
- * Like {@link #mergeFrom(Message)}, but merges from another {@link FieldSet}.
+ * Like {@link Message.Builder#mergeFrom(Message)}, but merges from another
+ * {@link FieldSet}.
*/
public void mergeFrom(final FieldSet<FieldDescriptorType> other) {
for (int i = 0; i < other.fields.getNumArrayEntries(); i++) {
@@ -428,14 +483,17 @@ final class FieldSet<FieldDescriptorType extends
}
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({"unchecked", "rawtypes"})
private void mergeFromField(
final Map.Entry<FieldDescriptorType, Object> entry) {
final FieldDescriptorType descriptor = entry.getKey();
- final Object otherValue = entry.getValue();
+ Object otherValue = entry.getValue();
+ if (otherValue instanceof LazyField) {
+ otherValue = ((LazyField) otherValue).getValue();
+ }
if (descriptor.isRepeated()) {
- Object value = fields.get(descriptor);
+ Object value = getField(descriptor);
if (value == null) {
// Our list is empty, but we still need to make a defensive copy of
// the other list since we don't know if the other FieldSet is still
@@ -446,7 +504,7 @@ final class FieldSet<FieldDescriptorType extends
((List) value).addAll((List) otherValue);
}
} else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
- Object value = fields.get(descriptor);
+ Object value = getField(descriptor);
if (value == null) {
fields.put(descriptor, otherValue);
} else {
@@ -457,7 +515,6 @@ final class FieldSet<FieldDescriptorType extends
((MessageLite) value).toBuilder(), (MessageLite) otherValue)
.build());
}
-
} else {
fields.put(descriptor, otherValue);
}
@@ -646,7 +703,11 @@ final class FieldSet<FieldDescriptorType extends
}
}
} else {
- writeElement(output, type, number, value);
+ if (value instanceof LazyField) {
+ writeElement(output, type, number, ((LazyField) value).getValue());
+ } else {
+ writeElement(output, type, number, value);
+ }
}
}
@@ -686,12 +747,18 @@ final class FieldSet<FieldDescriptorType extends
private int getMessageSetSerializedSize(
final Map.Entry<FieldDescriptorType, Object> entry) {
final FieldDescriptorType descriptor = entry.getKey();
- if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
- !descriptor.isRepeated() && !descriptor.isPacked()) {
- return CodedOutputStream.computeMessageSetExtensionSize(
- entry.getKey().getNumber(), (MessageLite) entry.getValue());
+ Object value = entry.getValue();
+ if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE
+ && !descriptor.isRepeated() && !descriptor.isPacked()) {
+ if (value instanceof LazyField) {
+ return CodedOutputStream.computeLazyFieldMessageSetExtensionSize(
+ entry.getKey().getNumber(), (LazyField) value);
+ } else {
+ return CodedOutputStream.computeMessageSetExtensionSize(
+ entry.getKey().getNumber(), (MessageLite) value);
+ }
} else {
- return computeFieldSize(descriptor, entry.getValue());
+ return computeFieldSize(descriptor, value);
}
}
@@ -741,7 +808,6 @@ final class FieldSet<FieldDescriptorType extends
case BOOL : return CodedOutputStream.computeBoolSizeNoTag ((Boolean )value);
case STRING : return CodedOutputStream.computeStringSizeNoTag ((String )value);
case GROUP : return CodedOutputStream.computeGroupSizeNoTag ((MessageLite)value);
- case MESSAGE : return CodedOutputStream.computeMessageSizeNoTag ((MessageLite)value);
case BYTES : return CodedOutputStream.computeBytesSizeNoTag ((ByteString )value);
case UINT32 : return CodedOutputStream.computeUInt32SizeNoTag ((Integer )value);
case SFIXED32: return CodedOutputStream.computeSFixed32SizeNoTag((Integer )value);
@@ -749,6 +815,13 @@ final class FieldSet<FieldDescriptorType extends
case SINT32 : return CodedOutputStream.computeSInt32SizeNoTag ((Integer )value);
case SINT64 : return CodedOutputStream.computeSInt64SizeNoTag ((Long )value);
+ case MESSAGE:
+ if (value instanceof LazyField) {
+ return CodedOutputStream.computeLazyFieldSizeNoTag((LazyField) value);
+ } else {
+ return CodedOutputStream.computeMessageSizeNoTag((MessageLite) value);
+ }
+
case ENUM:
return CodedOutputStream.computeEnumSizeNoTag(
((Internal.EnumLite) value).getNumber());
diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/src/main/java/com/google/protobuf/GeneratedMessage.java
index b5eaded5..0c15ca84 100644
--- a/java/src/main/java/com/google/protobuf/GeneratedMessage.java
+++ b/java/src/main/java/com/google/protobuf/GeneratedMessage.java
@@ -58,8 +58,6 @@ public abstract class GeneratedMessage extends AbstractMessage
implements Serializable {
private static final long serialVersionUID = 1L;
- private final UnknownFieldSet unknownFields;
-
/**
* For testing. Allows a test to disable the optimization that avoids using
* field builders for nested messages until they are requested. By disabling
@@ -68,11 +66,14 @@ public abstract class GeneratedMessage extends AbstractMessage
protected static boolean alwaysUseFieldBuilders = false;
protected GeneratedMessage() {
- this.unknownFields = UnknownFieldSet.getDefaultInstance();
}
protected GeneratedMessage(Builder<?> builder) {
- this.unknownFields = builder.getUnknownFields();
+ }
+
+ public Parser<? extends Message> getParserForType() {
+ throw new UnsupportedOperationException(
+ "This is supposed to be overridden by subclasses.");
}
/**
@@ -175,8 +176,28 @@ public abstract class GeneratedMessage extends AbstractMessage
}
//@Override (Java 1.6 override semantics, but we must support 1.5)
- public final UnknownFieldSet getUnknownFields() {
- return unknownFields;
+ public UnknownFieldSet getUnknownFields() {
+ throw new UnsupportedOperationException(
+ "This is supposed to be overridden by subclasses.");
+ }
+
+ /**
+ * Called by subclasses to parse an unknown field.
+ * @return {@code true} unless the tag is an end-group tag.
+ */
+ protected boolean parseUnknownField(
+ CodedInputStream input,
+ UnknownFieldSet.Builder unknownFields,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ return unknownFields.mergeFieldFrom(tag, input);
+ }
+
+ /**
+ * Used by parsing constructors in generated classes.
+ */
+ protected void makeExtensionsImmutable() {
+ // Noop for messages without extensions.
}
protected abstract Message.Builder newBuilderForType(BuilderParent parent);
@@ -319,6 +340,11 @@ public abstract class GeneratedMessage extends AbstractMessage
}
//@Override (Java 1.6 override semantics, but we must support 1.5)
+ public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+ return internalGetFieldAccessorTable().getField(field).getBuilder(this);
+ }
+
+ //@Override (Java 1.6 override semantics, but we must support 1.5)
public boolean hasField(final FieldDescriptor field) {
return internalGetFieldAccessorTable().getField(field).has(this);
}
@@ -626,6 +652,25 @@ public abstract class GeneratedMessage extends AbstractMessage
return super.isInitialized() && extensionsAreInitialized();
}
+ @Override
+ protected boolean parseUnknownField(
+ CodedInputStream input,
+ UnknownFieldSet.Builder unknownFields,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ return AbstractMessage.Builder.mergeFieldFrom(
+ input, unknownFields, extensionRegistry, getDescriptorForType(),
+ null, extensions, tag);
+ }
+
+ /**
+ * Used by parsing constructors in generated classes.
+ */
+ @Override
+ protected void makeExtensionsImmutable() {
+ extensions.makeImmutable();
+ }
+
/**
* Used by subclasses to serialize extensions. Extension ranges may be
* interleaved with field numbers, but we must write them in canonical
@@ -655,9 +700,21 @@ public abstract class GeneratedMessage extends AbstractMessage
if (messageSetWireFormat && descriptor.getLiteJavaType() ==
WireFormat.JavaType.MESSAGE &&
!descriptor.isRepeated()) {
- output.writeMessageSetExtension(descriptor.getNumber(),
- (Message) next.getValue());
+ if (next instanceof LazyField.LazyEntry<?>) {
+ output.writeRawMessageSetExtension(descriptor.getNumber(),
+ ((LazyField.LazyEntry<?>) next).getField().toByteString());
+ } else {
+ output.writeMessageSetExtension(descriptor.getNumber(),
+ (Message) next.getValue());
+ }
} else {
+ // TODO(xiangl): Taken care of following code, it may cause
+ // problem when we use LazyField for normal fields/extensions.
+ // Due to the optional field can be duplicated at the end of
+ // serialized bytes, which will make the serialized size change
+ // after lazy field parsed. So when we use LazyField globally,
+ // we need to change the following write method to write cached
+ // bytes directly rather than write the parsed message.
FieldSet.writeField(descriptor, next.getValue(), output);
}
if (iter.hasNext()) {
@@ -974,7 +1031,8 @@ public abstract class GeneratedMessage extends AbstractMessage
final ExtensionRegistryLite extensionRegistry,
final int tag) throws IOException {
return AbstractMessage.Builder.mergeFieldFrom(
- input, unknownFields, extensionRegistry, this, tag);
+ input, unknownFields, extensionRegistry, getDescriptorForType(),
+ this, null, tag);
}
// ---------------------------------------------------------------
@@ -1405,39 +1463,72 @@ public abstract class GeneratedMessage extends AbstractMessage
final String[] camelCaseNames,
final Class<? extends GeneratedMessage> messageClass,
final Class<? extends Builder> builderClass) {
+ this(descriptor, camelCaseNames);
+ ensureFieldAccessorsInitialized(messageClass, builderClass);
+ }
+
+ /**
+ * Construct a FieldAccessorTable for a particular message class without
+ * initializing FieldAccessors.
+ */
+ public FieldAccessorTable(
+ final Descriptor descriptor,
+ final String[] camelCaseNames) {
this.descriptor = descriptor;
+ this.camelCaseNames = camelCaseNames;
fields = new FieldAccessor[descriptor.getFields().size()];
+ initialized = false;
+ }
- for (int i = 0; i < fields.length; i++) {
- final FieldDescriptor field = descriptor.getFields().get(i);
- if (field.isRepeated()) {
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
- fields[i] = new RepeatedMessageFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
- } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
- fields[i] = new RepeatedEnumFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
- } else {
- fields[i] = new RepeatedFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
- }
- } else {
- if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
- fields[i] = new SingularMessageFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
- } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
- fields[i] = new SingularEnumFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
+ /**
+ * Ensures the field accessors are initialized. This method is thread-safe.
+ *
+ * @param messageClass The message type.
+ * @param builderClass The builder type.
+ * @return this
+ */
+ public FieldAccessorTable ensureFieldAccessorsInitialized(
+ Class<? extends GeneratedMessage> messageClass,
+ Class<? extends Builder> builderClass) {
+ if (initialized) { return this; }
+ synchronized (this) {
+ if (initialized) { return this; }
+ for (int i = 0; i < fields.length; i++) {
+ FieldDescriptor field = descriptor.getFields().get(i);
+ if (field.isRepeated()) {
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ fields[i] = new RepeatedMessageFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+ fields[i] = new RepeatedEnumFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ } else {
+ fields[i] = new RepeatedFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ }
} else {
- fields[i] = new SingularFieldAccessor(
- field, camelCaseNames[i], messageClass, builderClass);
+ if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+ fields[i] = new SingularMessageFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+ fields[i] = new SingularEnumFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ } else {
+ fields[i] = new SingularFieldAccessor(
+ field, camelCaseNames[i], messageClass, builderClass);
+ }
}
}
+ initialized = true;
+ camelCaseNames = null;
+ return this;
}
}
private final Descriptor descriptor;
private final FieldAccessor[] fields;
+ private String[] camelCaseNames;
+ private volatile boolean initialized;
/** Get the FieldAccessor for a particular field. */
private FieldAccessor getField(final FieldDescriptor field) {
@@ -1472,6 +1563,7 @@ public abstract class GeneratedMessage extends AbstractMessage
int getRepeatedCount(GeneratedMessage.Builder builder);
void clear(Builder builder);
Message.Builder newBuilder();
+ Message.Builder getBuilder(GeneratedMessage.Builder builder);
}
// ---------------------------------------------------------------
@@ -1551,6 +1643,10 @@ public abstract class GeneratedMessage extends AbstractMessage
throw new UnsupportedOperationException(
"newBuilderForField() called on a non-Message type.");
}
+ public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+ throw new UnsupportedOperationException(
+ "getFieldBuilder() called on a non-Message type.");
+ }
}
private static class RepeatedFieldAccessor implements FieldAccessor {
@@ -1573,8 +1669,6 @@ public abstract class GeneratedMessage extends AbstractMessage
"get" + camelCaseName + "List");
getMethodBuilder = getMethodOrDie(builderClass,
"get" + camelCaseName + "List");
-
-
getRepeatedMethod =
getMethodOrDie(messageClass, "get" + camelCaseName, Integer.TYPE);
getRepeatedMethodBuilder =
@@ -1625,11 +1719,11 @@ public abstract class GeneratedMessage extends AbstractMessage
}
public boolean has(final GeneratedMessage message) {
throw new UnsupportedOperationException(
- "hasField() called on a singular field.");
+ "hasField() called on a repeated field.");
}
public boolean has(GeneratedMessage.Builder builder) {
throw new UnsupportedOperationException(
- "hasField() called on a singular field.");
+ "hasField() called on a repeated field.");
}
public int getRepeatedCount(final GeneratedMessage message) {
return (Integer) invokeOrDie(getCountMethod, message);
@@ -1644,6 +1738,10 @@ public abstract class GeneratedMessage extends AbstractMessage
throw new UnsupportedOperationException(
"newBuilderForField() called on a non-Message type.");
}
+ public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+ throw new UnsupportedOperationException(
+ "getFieldBuilder() called on a non-Message type.");
+ }
}
// ---------------------------------------------------------------
@@ -1753,9 +1851,12 @@ public abstract class GeneratedMessage extends AbstractMessage
super(descriptor, camelCaseName, messageClass, builderClass);
newBuilderMethod = getMethodOrDie(type, "newBuilder");
+ getBuilderMethodBuilder =
+ getMethodOrDie(builderClass, "get" + camelCaseName + "Builder");
}
private final Method newBuilderMethod;
+ private final Method getBuilderMethodBuilder;
private Object coerceType(final Object value) {
if (type.isInstance(value)) {
@@ -1766,7 +1867,7 @@ public abstract class GeneratedMessage extends AbstractMessage
// DynamicMessage -- we should accept it. In this case we can make
// a copy of the message.
return ((Message.Builder) invokeOrDie(newBuilderMethod, null))
- .mergeFrom((Message) value).build();
+ .mergeFrom((Message) value).buildPartial();
}
}
@@ -1778,6 +1879,10 @@ public abstract class GeneratedMessage extends AbstractMessage
public Message.Builder newBuilder() {
return (Message.Builder) invokeOrDie(newBuilderMethod, null);
}
+ @Override
+ public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+ return (Message.Builder) invokeOrDie(getBuilderMethodBuilder, builder);
+ }
}
private static final class RepeatedMessageFieldAccessor
@@ -1825,7 +1930,7 @@ public abstract class GeneratedMessage extends AbstractMessage
/**
* Replaces this object in the output stream with a serialized form.
* Part of Java's serialization magic. Generated sub-classes must override
- * this method by calling <code>return super.writeReplace();</code>
+ * this method by calling {@code return super.writeReplace();}
* @return a SerializedForm of this message
*/
protected Object writeReplace() throws ObjectStreamException {
diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
index 1813e9b3..437e3412 100644
--- a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
+++ b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
@@ -55,6 +55,29 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
protected GeneratedMessageLite(Builder builder) {
}
+ public Parser<? extends MessageLite> getParserForType() {
+ throw new UnsupportedOperationException(
+ "This is supposed to be overridden by subclasses.");
+ }
+
+ /**
+ * Called by subclasses to parse an unknown field.
+ * @return {@code true} unless the tag is an end-group tag.
+ */
+ protected boolean parseUnknownField(
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ return input.skipField(tag);
+ }
+
+ /**
+ * Used by parsing constructors in generated classes.
+ */
+ protected void makeExtensionsImmutable() {
+ // Noop for messages without extensions.
+ }
+
@SuppressWarnings("unchecked")
public abstract static class Builder<MessageType extends GeneratedMessageLite,
BuilderType extends Builder>
@@ -86,9 +109,9 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
* @return {@code true} unless the tag is an end-group tag.
*/
protected boolean parseUnknownField(
- final CodedInputStream input,
- final ExtensionRegistryLite extensionRegistry,
- final int tag) throws IOException {
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
return input.skipField(tag);
}
}
@@ -194,6 +217,31 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
}
/**
+ * Called by subclasses to parse an unknown field or an extension.
+ * @return {@code true} unless the tag is an end-group tag.
+ */
+ @Override
+ protected boolean parseUnknownField(
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ return GeneratedMessageLite.parseUnknownField(
+ extensions,
+ getDefaultInstanceForType(),
+ input,
+ extensionRegistry,
+ tag);
+ }
+
+ /**
+ * Used by parsing constructors in generated classes.
+ */
+ @Override
+ protected void makeExtensionsImmutable() {
+ extensions.makeImmutable();
+ }
+
+ /**
* Used by subclasses to serialize extensions. Extension ranges may be
* interleaved with field numbers, but we must write them in canonical
* (sorted by field number) order. ExtensionWriter helps us write
@@ -400,121 +448,139 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
*/
@Override
protected boolean parseUnknownField(
- final CodedInputStream input,
- final ExtensionRegistryLite extensionRegistry,
- final int tag) throws IOException {
- final int wireType = WireFormat.getTagWireType(tag);
- final int fieldNumber = WireFormat.getTagFieldNumber(tag);
-
- final GeneratedExtension<MessageType, ?> extension =
- extensionRegistry.findLiteExtensionByNumber(
- getDefaultInstanceForType(), fieldNumber);
-
- boolean unknown = false;
- boolean packed = false;
- if (extension == null) {
- unknown = true; // Unknown field.
- } else if (wireType == FieldSet.getWireFormatForFieldType(
- extension.descriptor.getLiteType(),
- false /* isPacked */)) {
- packed = false; // Normal, unpacked value.
- } else if (extension.descriptor.isRepeated &&
- extension.descriptor.type.isPackable() &&
- wireType == FieldSet.getWireFormatForFieldType(
- extension.descriptor.getLiteType(),
- true /* isPacked */)) {
- packed = true; // Packed value.
- } else {
- unknown = true; // Wrong wire type.
- }
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ ensureExtensionsIsMutable();
+ return GeneratedMessageLite.parseUnknownField(
+ extensions,
+ getDefaultInstanceForType(),
+ input,
+ extensionRegistry,
+ tag);
+ }
- if (unknown) { // Unknown field or wrong wire type. Skip.
- return input.skipField(tag);
- }
+ protected final void mergeExtensionFields(final MessageType other) {
+ ensureExtensionsIsMutable();
+ extensions.mergeFrom(((ExtendableMessage) other).extensions);
+ }
+ }
- if (packed) {
- final int length = input.readRawVarint32();
- final int limit = input.pushLimit(length);
- if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
- while (input.getBytesUntilLimit() > 0) {
- final int rawValue = input.readEnum();
- final Object value =
- extension.descriptor.getEnumType().findValueByNumber(rawValue);
- if (value == null) {
- // If the number isn't recognized as a valid value for this
- // enum, drop it (don't even add it to unknownFields).
- return true;
- }
- ensureExtensionsIsMutable();
- extensions.addRepeatedField(extension.descriptor, value);
- }
- } else {
- while (input.getBytesUntilLimit() > 0) {
- final Object value =
- FieldSet.readPrimitiveField(input,
- extension.descriptor.getLiteType());
- ensureExtensionsIsMutable();
- extensions.addRepeatedField(extension.descriptor, value);
+ // -----------------------------------------------------------------
+
+ /**
+ * Parse an unknown field or an extension.
+ * @return {@code true} unless the tag is an end-group tag.
+ */
+ private static <MessageType extends MessageLite>
+ boolean parseUnknownField(
+ FieldSet<ExtensionDescriptor> extensions,
+ MessageType defaultInstance,
+ CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry,
+ int tag) throws IOException {
+ int wireType = WireFormat.getTagWireType(tag);
+ int fieldNumber = WireFormat.getTagFieldNumber(tag);
+
+ GeneratedExtension<MessageType, ?> extension =
+ extensionRegistry.findLiteExtensionByNumber(
+ defaultInstance, fieldNumber);
+
+ boolean unknown = false;
+ boolean packed = false;
+ if (extension == null) {
+ unknown = true; // Unknown field.
+ } else if (wireType == FieldSet.getWireFormatForFieldType(
+ extension.descriptor.getLiteType(),
+ false /* isPacked */)) {
+ packed = false; // Normal, unpacked value.
+ } else if (extension.descriptor.isRepeated &&
+ extension.descriptor.type.isPackable() &&
+ wireType == FieldSet.getWireFormatForFieldType(
+ extension.descriptor.getLiteType(),
+ true /* isPacked */)) {
+ packed = true; // Packed value.
+ } else {
+ unknown = true; // Wrong wire type.
+ }
+
+ if (unknown) { // Unknown field or wrong wire type. Skip.
+ return input.skipField(tag);
+ }
+
+ if (packed) {
+ int length = input.readRawVarint32();
+ int limit = input.pushLimit(length);
+ if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
+ while (input.getBytesUntilLimit() > 0) {
+ int rawValue = input.readEnum();
+ Object value =
+ extension.descriptor.getEnumType().findValueByNumber(rawValue);
+ if (value == null) {
+ // If the number isn't recognized as a valid value for this
+ // enum, drop it (don't even add it to unknownFields).
+ return true;
}
+ extensions.addRepeatedField(extension.descriptor, value);
}
- input.popLimit(limit);
} else {
- final Object value;
- switch (extension.descriptor.getLiteJavaType()) {
- case MESSAGE: {
- MessageLite.Builder subBuilder = null;
- if (!extension.descriptor.isRepeated()) {
- MessageLite existingValue =
- (MessageLite) extensions.getField(extension.descriptor);
- if (existingValue != null) {
- subBuilder = existingValue.toBuilder();
- }
- }
- if (subBuilder == null) {
- subBuilder = extension.messageDefaultInstance.newBuilderForType();
- }
- if (extension.descriptor.getLiteType() ==
- WireFormat.FieldType.GROUP) {
- input.readGroup(extension.getNumber(),
- subBuilder, extensionRegistry);
- } else {
- input.readMessage(subBuilder, extensionRegistry);
+ while (input.getBytesUntilLimit() > 0) {
+ Object value =
+ FieldSet.readPrimitiveField(input,
+ extension.descriptor.getLiteType());
+ extensions.addRepeatedField(extension.descriptor, value);
+ }
+ }
+ input.popLimit(limit);
+ } else {
+ Object value;
+ switch (extension.descriptor.getLiteJavaType()) {
+ case MESSAGE: {
+ MessageLite.Builder subBuilder = null;
+ if (!extension.descriptor.isRepeated()) {
+ MessageLite existingValue =
+ (MessageLite) extensions.getField(extension.descriptor);
+ if (existingValue != null) {
+ subBuilder = existingValue.toBuilder();
}
- value = subBuilder.build();
- break;
}
- case ENUM:
- final int rawValue = input.readEnum();
- value = extension.descriptor.getEnumType()
- .findValueByNumber(rawValue);
- // If the number isn't recognized as a valid value for this enum,
- // drop it.
- if (value == null) {
- return true;
- }
- break;
- default:
- value = FieldSet.readPrimitiveField(input,
- extension.descriptor.getLiteType());
- break;
- }
-
- if (extension.descriptor.isRepeated()) {
- ensureExtensionsIsMutable();
- extensions.addRepeatedField(extension.descriptor, value);
- } else {
- ensureExtensionsIsMutable();
- extensions.setField(extension.descriptor, value);
+ if (subBuilder == null) {
+ subBuilder = extension.messageDefaultInstance.newBuilderForType();
+ }
+ if (extension.descriptor.getLiteType() ==
+ WireFormat.FieldType.GROUP) {
+ input.readGroup(extension.getNumber(),
+ subBuilder, extensionRegistry);
+ } else {
+ input.readMessage(subBuilder, extensionRegistry);
+ }
+ value = subBuilder.build();
+ break;
}
+ case ENUM:
+ int rawValue = input.readEnum();
+ value = extension.descriptor.getEnumType()
+ .findValueByNumber(rawValue);
+ // If the number isn't recognized as a valid value for this enum,
+ // drop it.
+ if (value == null) {
+ return true;
+ }
+ break;
+ default:
+ value = FieldSet.readPrimitiveField(input,
+ extension.descriptor.getLiteType());
+ break;
}
- return true;
+ if (extension.descriptor.isRepeated()) {
+ extensions.addRepeatedField(extension.descriptor, value);
+ } else {
+ extensions.setField(extension.descriptor, value);
+ }
}
- protected final void mergeExtensionFields(final MessageType other) {
- ensureExtensionsIsMutable();
- extensions.mergeFrom(((ExtendableMessage) other).extensions);
- }
+ return true;
}
// -----------------------------------------------------------------
@@ -722,7 +788,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
/**
* Replaces this object in the output stream with a serialized form.
* Part of Java's serialization magic. Generated sub-classes must override
- * this method by calling <code>return super.writeReplace();</code>
+ * this method by calling {@code return super.writeReplace();}
* @return a SerializedForm of this message
*/
protected Object writeReplace() throws ObjectStreamException {
diff --git a/java/src/main/java/com/google/protobuf/Internal.java b/java/src/main/java/com/google/protobuf/Internal.java
index 05eab57a..81af2583 100644
--- a/java/src/main/java/com/google/protobuf/Internal.java
+++ b/java/src/main/java/com/google/protobuf/Internal.java
@@ -103,85 +103,32 @@ public class Internal {
* Helper called by generated code to determine if a byte array is a valid
* UTF-8 encoded string such that the original bytes can be converted to
* a String object and then back to a byte array round tripping the bytes
- * without loss.
- * <p>
- * This is inspired by UTF_8.java in sun.nio.cs.
+ * without loss. More precisely, returns {@code true} whenever:
+ * <pre> {@code
+ * Arrays.equals(byteString.toByteArray(),
+ * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>This method rejects "overlong" byte sequences, as well as
+ * 3-byte sequences that would map to a surrogate character, in
+ * accordance with the restricted definition of UTF-8 introduced in
+ * Unicode 3.1. Note that the UTF-8 decoder included in Oracle's
+ * JDK has been modified to also reject "overlong" byte sequences,
+ * but currently (2011) still accepts 3-byte surrogate character
+ * byte sequences.
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * <p>As of 2011-02, this method simply returns the result of {@link
+ * ByteString#isValidUtf8()}. Calling that method directly is preferred.
*
* @param byteString the string to check
* @return whether the byte array is round trippable
*/
public static boolean isValidUtf8(ByteString byteString) {
- int index = 0;
- int size = byteString.size();
- // To avoid the masking, we could change this to use bytes;
- // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
- // gets turned into X >= 0, etc.
-
- while (index < size) {
- int byte1 = byteString.byteAt(index++) & 0xFF;
- if (byte1 < 0x80) {
- // fast loop for single bytes
- continue;
-
- // we know from this point on that we have 2-4 byte forms
- } else if (byte1 < 0xC2 || byte1 > 0xF4) {
- // catch illegal first bytes: < C2 or > F4
- return false;
- }
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte2 = byteString.byteAt(index++) & 0xFF;
- if (byte2 < 0x80 || byte2 > 0xBF) {
- // general trail-byte test
- return false;
- }
- if (byte1 <= 0xDF) {
- // two-byte form; general trail-byte test is sufficient
- continue;
- }
-
- // we know from this point on that we have 3 or 4 byte forms
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte3 = byteString.byteAt(index++) & 0xFF;
- if (byte3 < 0x80 || byte3 > 0xBF) {
- // general trail-byte test
- return false;
- }
- if (byte1 <= 0xEF) {
- // three-byte form. Vastly more frequent than four-byte forms
- // The following has an extra test, but not worth restructuring
- if (byte1 == 0xE0 && byte2 < 0xA0 ||
- byte1 == 0xED && byte2 > 0x9F) {
- // check special cases of byte2
- return false;
- }
-
- } else {
- // four-byte form
-
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte4 = byteString.byteAt(index++) & 0xFF;
- if (byte4 < 0x80 || byte4 > 0xBF) {
- // general trail-byte test
- return false;
- }
- // The following has an extra test, but not worth restructuring
- if (byte1 == 0xF0 && byte2 < 0x90 ||
- byte1 == 0xF4 && byte2 > 0x8F) {
- // check special cases of byte2
- return false;
- }
- }
- }
- return true;
+ return byteString.isValidUtf8();
}
/**
diff --git a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
index 90f7ffbc..72d7ff7d 100644
--- a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
+++ b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
@@ -40,11 +40,32 @@ import java.io.IOException;
*/
public class InvalidProtocolBufferException extends IOException {
private static final long serialVersionUID = -1616151763072450476L;
+ private MessageLite unfinishedMessage = null;
public InvalidProtocolBufferException(final String description) {
super(description);
}
+ /**
+ * Attaches an unfinished message to the exception to support best-effort
+ * parsing in {@code Parser} interface.
+ *
+ * @return this
+ */
+ public InvalidProtocolBufferException setUnfinishedMessage(
+ MessageLite unfinishedMessage) {
+ this.unfinishedMessage = unfinishedMessage;
+ return this;
+ }
+
+ /**
+ * Returns the unfinished message attached to the exception, or null if
+ * no message is attached.
+ */
+ public MessageLite getUnfinishedMessage() {
+ return unfinishedMessage;
+ }
+
static InvalidProtocolBufferException truncatedMessage() {
return new InvalidProtocolBufferException(
"While parsing a protocol message, the input ended unexpectedly " +
diff --git a/java/src/main/java/com/google/protobuf/LazyField.java b/java/src/main/java/com/google/protobuf/LazyField.java
new file mode 100644
index 00000000..df9425eb
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/LazyField.java
@@ -0,0 +1,216 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+/**
+ * LazyField encapsulates the logic of lazily parsing message fields. It stores
+ * the message in a ByteString initially and then parse it on-demand.
+ *
+ * LazyField is thread-compatible e.g. concurrent read are safe, however,
+ * synchronizations are needed under read/write situations.
+ *
+ * Now LazyField is only used to lazily load MessageSet.
+ * TODO(xiangl): Use LazyField to lazily load all messages.
+ *
+ * @author xiangl@google.com (Xiang Li)
+ */
+class LazyField {
+
+ final private MessageLite defaultInstance;
+ final private ExtensionRegistryLite extensionRegistry;
+
+ // Mutable because it is initialized lazily.
+ private ByteString bytes;
+ private volatile MessageLite value;
+ private volatile boolean isDirty = false;
+
+ public LazyField(MessageLite defaultInstance,
+ ExtensionRegistryLite extensionRegistry, ByteString bytes) {
+ this.defaultInstance = defaultInstance;
+ this.extensionRegistry = extensionRegistry;
+ this.bytes = bytes;
+ }
+
+ public MessageLite getValue() {
+ ensureInitialized();
+ return value;
+ }
+
+ /**
+ * LazyField is not thread-safe for write access. Synchronizations are needed
+ * under read/write situations.
+ */
+ public MessageLite setValue(MessageLite value) {
+ MessageLite originalValue = this.value;
+ this.value = value;
+ bytes = null;
+ isDirty = true;
+ return originalValue;
+ }
+
+ /**
+ * Due to the optional field can be duplicated at the end of serialized
+ * bytes, which will make the serialized size changed after LazyField
+ * parsed. Be careful when using this method.
+ */
+ public int getSerializedSize() {
+ if (isDirty) {
+ return value.getSerializedSize();
+ }
+ return bytes.size();
+ }
+
+ public ByteString toByteString() {
+ if (!isDirty) {
+ return bytes;
+ }
+ synchronized (this) {
+ if (!isDirty) {
+ return bytes;
+ }
+ bytes = value.toByteString();
+ isDirty = false;
+ return bytes;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ ensureInitialized();
+ return value.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ ensureInitialized();
+ return value.equals(obj);
+ }
+
+ @Override
+ public String toString() {
+ ensureInitialized();
+ return value.toString();
+ }
+
+ private void ensureInitialized() {
+ if (value != null) {
+ return;
+ }
+ synchronized (this) {
+ if (value != null) {
+ return;
+ }
+ try {
+ if (bytes != null) {
+ value = defaultInstance.getParserForType()
+ .parseFrom(bytes, extensionRegistry);
+ }
+ } catch (IOException e) {
+ // TODO(xiangl): Refactory the API to support the exception thrown from
+ // lazily load messages.
+ }
+ }
+ }
+
+ // ====================================================
+
+ /**
+ * LazyEntry and LazyIterator are used to encapsulate the LazyField, when
+ * users iterate all fields from FieldSet.
+ */
+ static class LazyEntry<K> implements Entry<K, Object> {
+ private Entry<K, LazyField> entry;
+
+ private LazyEntry(Entry<K, LazyField> entry) {
+ this.entry = entry;
+ }
+
+ @Override
+ public K getKey() {
+ return entry.getKey();
+ }
+
+ @Override
+ public Object getValue() {
+ LazyField field = entry.getValue();
+ if (field == null) {
+ return null;
+ }
+ return field.getValue();
+ }
+
+ public LazyField getField() {
+ return entry.getValue();
+ }
+
+ @Override
+ public Object setValue(Object value) {
+ if (!(value instanceof MessageLite)) {
+ throw new IllegalArgumentException(
+ "LazyField now only used for MessageSet, "
+ + "and the value of MessageSet must be an instance of MessageLite");
+ }
+ return entry.getValue().setValue((MessageLite) value);
+ }
+ }
+
+ static class LazyIterator<K> implements Iterator<Entry<K, Object>> {
+ private Iterator<Entry<K, Object>> iterator;
+
+ public LazyIterator(Iterator<Entry<K, Object>> iterator) {
+ this.iterator = iterator;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return iterator.hasNext();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Entry<K, Object> next() {
+ Entry<K, ?> entry = iterator.next();
+ if (entry.getValue() instanceof LazyField) {
+ return new LazyEntry<K>((Entry<K, LazyField>) entry);
+ }
+ return (Entry<K, Object>) entry;
+ }
+
+ @Override
+ public void remove() {
+ iterator.remove();
+ }
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
index 1683a640..75c6a4b7 100644
--- a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
+++ b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
@@ -33,8 +33,9 @@ package com.google.protobuf;
import java.util.List;
import java.util.AbstractList;
import java.util.ArrayList;
-import java.util.RandomAccess;
import java.util.Collection;
+import java.util.Collections;
+import java.util.RandomAccess;
/**
* An implementation of {@link LazyStringList} that wraps an ArrayList. Each
@@ -72,6 +73,11 @@ public class LazyStringArrayList extends AbstractList<String>
list = new ArrayList<Object>();
}
+ public LazyStringArrayList(LazyStringList from) {
+ list = new ArrayList<Object>(from.size());
+ addAll(from);
+ }
+
public LazyStringArrayList(List<String> from) {
list = new ArrayList<Object>(from);
}
@@ -84,7 +90,7 @@ public class LazyStringArrayList extends AbstractList<String>
} else {
ByteString bs = (ByteString) o;
String s = bs.toStringUtf8();
- if (Internal.isValidUtf8(bs)) {
+ if (bs.isValidUtf8()) {
list.set(index, s);
}
return s;
@@ -109,8 +115,21 @@ public class LazyStringArrayList extends AbstractList<String>
}
@Override
+ public boolean addAll(Collection<? extends String> c) {
+ // The default implementation of AbstractCollection.addAll(Collection)
+ // delegates to add(Object). This implementation instead delegates to
+ // addAll(int, Collection), which makes a special case for Collections
+ // which are instances of LazyStringList.
+ return addAll(size(), c);
+ }
+
+ @Override
public boolean addAll(int index, Collection<? extends String> c) {
- boolean ret = list.addAll(index, c);
+ // When copying from another LazyStringList, directly copy the underlying
+ // elements rather than forcing each element to be decoded to a String.
+ Collection<?> collection = c instanceof LazyStringList
+ ? ((LazyStringList) c).getUnderlyingElements() : c;
+ boolean ret = list.addAll(index, collection);
modCount++;
return ret;
}
@@ -152,4 +171,9 @@ public class LazyStringArrayList extends AbstractList<String>
return ((ByteString) o).toStringUtf8();
}
}
+
+ @Override
+ public List<?> getUnderlyingElements() {
+ return Collections.unmodifiableList(list);
+ }
}
diff --git a/java/src/main/java/com/google/protobuf/LazyStringList.java b/java/src/main/java/com/google/protobuf/LazyStringList.java
index 97139ca6..630932fe 100644
--- a/java/src/main/java/com/google/protobuf/LazyStringList.java
+++ b/java/src/main/java/com/google/protobuf/LazyStringList.java
@@ -33,7 +33,7 @@ package com.google.protobuf;
import java.util.List;
/**
- * An interface extending List&lt;String&gt; that also provides access to the
+ * An interface extending {@code List<String>} that also provides access to the
* items of the list as UTF8-encoded ByteString objects. This is used by the
* protocol buffer implementation to support lazily converting bytes parsed
* over the wire to String objects until needed and also increases the
@@ -41,9 +41,9 @@ import java.util.List;
* ByteString is already cached.
* <p>
* This only adds additional methods that are required for the use in the
- * protocol buffer code in order to be able successfuly round trip byte arrays
+ * protocol buffer code in order to be able successfully round trip byte arrays
* through parsing and serialization without conversion to strings. It's not
- * attempting to support the functionality of say List&ltByteString&gt, hence
+ * attempting to support the functionality of say {@code List<ByteString>}, hence
* why only these two very specific methods are added.
*
* @author jonp@google.com (Jon Perlow)
@@ -56,7 +56,7 @@ public interface LazyStringList extends List<String> {
* @param index index of the element to return
* @return the element at the specified position in this list
* @throws IndexOutOfBoundsException if the index is out of range
- * (<tt>index &lt; 0 || index &gt;= size()</tt>)
+ * ({@code index < 0 || index >= size()})
*/
ByteString getByteString(int index);
@@ -69,4 +69,13 @@ public interface LazyStringList extends List<String> {
* is not supported by this list
*/
void add(ByteString element);
+
+ /**
+ * Returns an unmodifiable List of the underlying elements, each of
+ * which is either a {@code String} or its equivalent UTF-8 encoded
+ * {@code ByteString}. It is an error for the caller to modify the returned
+ * List, and attempting to do so will result in an
+ * {@link UnsupportedOperationException}.
+ */
+ List<?> getUnderlyingElements();
}
diff --git a/java/src/main/java/com/google/protobuf/LiteralByteString.java b/java/src/main/java/com/google/protobuf/LiteralByteString.java
new file mode 100644
index 00000000..93c53dce
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java
@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * This class implements a {@link com.google.protobuf.ByteString} backed by a
+ * single array of bytes, contiguous in memory. It supports substring by
+ * pointing to only a sub-range of the underlying byte array, meaning that a
+ * substring will reference the full byte-array of the string it's made from,
+ * exactly as with {@link String}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class LiteralByteString extends ByteString {
+
+ protected final byte[] bytes;
+
+ /**
+ * Creates a {@code LiteralByteString} backed by the given array, without
+ * copying.
+ *
+ * @param bytes array to wrap
+ */
+ LiteralByteString(byte[] bytes) {
+ this.bytes = bytes;
+ }
+
+ @Override
+ public byte byteAt(int index) {
+ // Unlike most methods in this class, this one is a direct implementation
+ // ignoring the potential offset because we need to do range-checking in the
+ // substring case anyway.
+ return bytes[index];
+ }
+
+ @Override
+ public int size() {
+ return bytes.length;
+ }
+
+ // =================================================================
+ // ByteString -> substring
+
+ @Override
+ public ByteString substring(int beginIndex, int endIndex) {
+ if (beginIndex < 0) {
+ throw new IndexOutOfBoundsException(
+ "Beginning index: " + beginIndex + " < 0");
+ }
+ if (endIndex > size()) {
+ throw new IndexOutOfBoundsException("End index: " + endIndex + " > " +
+ size());
+ }
+ int substringLength = endIndex - beginIndex;
+ if (substringLength < 0) {
+ throw new IndexOutOfBoundsException(
+ "Beginning index larger than ending index: " + beginIndex + ", "
+ + endIndex);
+ }
+
+ ByteString result;
+ if (substringLength == 0) {
+ result = ByteString.EMPTY;
+ } else {
+ result = new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex,
+ substringLength);
+ }
+ return result;
+ }
+
+ // =================================================================
+ // ByteString -> byte[]
+
+ @Override
+ protected void copyToInternal(byte[] target, int sourceOffset,
+ int targetOffset, int numberToCopy) {
+ // Optimized form, not for subclasses, since we don't call
+ // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
+ System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
+ }
+
+ @Override
+ public void copyTo(ByteBuffer target) {
+ target.put(bytes, getOffsetIntoBytes(), size()); // Copies bytes
+ }
+
+ @Override
+ public ByteBuffer asReadOnlyByteBuffer() {
+ ByteBuffer byteBuffer =
+ ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size());
+ return byteBuffer.asReadOnlyBuffer();
+ }
+
+ @Override
+ public List<ByteBuffer> asReadOnlyByteBufferList() {
+ // Return the ByteBuffer generated by asReadOnlyByteBuffer() as a singleton
+ List<ByteBuffer> result = new ArrayList<ByteBuffer>(1);
+ result.add(asReadOnlyByteBuffer());
+ return result;
+ }
+
+ @Override
+ public void writeTo(OutputStream outputStream) throws IOException {
+ outputStream.write(toByteArray());
+ }
+
+ @Override
+ public String toString(String charsetName)
+ throws UnsupportedEncodingException {
+ return new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+ }
+
+ // =================================================================
+ // UTF-8 decoding
+
+ @Override
+ public boolean isValidUtf8() {
+ int offset = getOffsetIntoBytes();
+ return Utf8.isValidUtf8(bytes, offset, offset + size());
+ }
+
+ @Override
+ protected int partialIsValidUtf8(int state, int offset, int length) {
+ int index = getOffsetIntoBytes() + offset;
+ return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
+ }
+
+ // =================================================================
+ // equals() and hashCode()
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == this) {
+ return true;
+ }
+ if (!(other instanceof ByteString)) {
+ return false;
+ }
+
+ if (size() != ((ByteString) other).size()) {
+ return false;
+ }
+ if (size() == 0) {
+ return true;
+ }
+
+ if (other instanceof LiteralByteString) {
+ return equalsRange((LiteralByteString) other, 0, size());
+ } else if (other instanceof RopeByteString) {
+ return other.equals(this);
+ } else {
+ throw new IllegalArgumentException(
+ "Has a new type of ByteString been created? Found "
+ + other.getClass());
+ }
+ }
+
+ /**
+ * Check equality of the substring of given length of this object starting at
+ * zero with another {@code LiteralByteString} substring starting at offset.
+ *
+ * @param other what to compare a substring in
+ * @param offset offset into other
+ * @param length number of bytes to compare
+ * @return true for equality of substrings, else false.
+ */
+ boolean equalsRange(LiteralByteString other, int offset, int length) {
+ if (length > other.size()) {
+ throw new IllegalArgumentException(
+ "Length too large: " + length + size());
+ }
+ if (offset + length > other.size()) {
+ throw new IllegalArgumentException(
+ "Ran off end of other: " + offset + ", " + length + ", " +
+ other.size());
+ }
+
+ byte[] thisBytes = bytes;
+ byte[] otherBytes = other.bytes;
+ int thisLimit = getOffsetIntoBytes() + length;
+ for (int thisIndex = getOffsetIntoBytes(), otherIndex =
+ other.getOffsetIntoBytes() + offset;
+ (thisIndex < thisLimit); ++thisIndex, ++otherIndex) {
+ if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Cached hash value. Intentionally accessed via a data race, which
+ * is safe because of the Java Memory Model's "no out-of-thin-air values"
+ * guarantees for ints.
+ */
+ private int hash = 0;
+
+ /**
+ * Compute the hashCode using the traditional algorithm from {@link
+ * ByteString}.
+ *
+ * @return hashCode value
+ */
+ @Override
+ public int hashCode() {
+ int h = hash;
+
+ if (h == 0) {
+ int size = size();
+ h = partialHash(size, 0, size);
+ if (h == 0) {
+ h = 1;
+ }
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ protected int peekCachedHashCode() {
+ return hash;
+ }
+
+ @Override
+ protected int partialHash(int h, int offset, int length) {
+ byte[] thisBytes = bytes;
+ for (int i = getOffsetIntoBytes() + offset, limit = i + length; i < limit;
+ i++) {
+ h = h * 31 + thisBytes[i];
+ }
+ return h;
+ }
+
+ // =================================================================
+ // Input stream
+
+ @Override
+ public InputStream newInput() {
+ return new ByteArrayInputStream(bytes, getOffsetIntoBytes(),
+ size()); // No copy
+ }
+
+ @Override
+ public CodedInputStream newCodedInput() {
+ // We trust CodedInputStream not to modify the bytes, or to give anyone
+ // else access to them.
+ return CodedInputStream
+ .newInstance(bytes, getOffsetIntoBytes(), size()); // No copy
+ }
+
+ // =================================================================
+ // ByteIterator
+
+ @Override
+ public ByteIterator iterator() {
+ return new LiteralByteIterator();
+ }
+
+ private class LiteralByteIterator implements ByteIterator {
+ private int position;
+ private final int limit;
+
+ private LiteralByteIterator() {
+ position = 0;
+ limit = size();
+ }
+
+ public boolean hasNext() {
+ return (position < limit);
+ }
+
+ public Byte next() {
+ // Boxing calls Byte.valueOf(byte), which does not instantiate.
+ return nextByte();
+ }
+
+ public byte nextByte() {
+ try {
+ return bytes[position++];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ throw new NoSuchElementException(e.getMessage());
+ }
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ // =================================================================
+ // Internal methods
+
+ @Override
+ protected int getTreeDepth() {
+ return 0;
+ }
+
+ @Override
+ protected boolean isBalanced() {
+ return true;
+ }
+
+ /**
+ * Offset into {@code bytes[]} to use, non-zero for substrings.
+ *
+ * @return always 0 for this class
+ */
+ protected int getOffsetIntoBytes() {
+ return 0;
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/Message.java b/java/src/main/java/com/google/protobuf/Message.java
index 67c4148e..2b881413 100644
--- a/java/src/main/java/com/google/protobuf/Message.java
+++ b/java/src/main/java/com/google/protobuf/Message.java
@@ -50,25 +50,28 @@ import java.util.Map;
*/
public interface Message extends MessageLite, MessageOrBuilder {
+ // (From MessageLite, re-declared here only for return type covariance.)
+ Parser<? extends Message> getParserForType();
+
// -----------------------------------------------------------------
// Comparison and hashing
/**
* Compares the specified object with this message for equality. Returns
- * <tt>true</tt> if the given object is a message of the same type (as
+ * {@code true} if the given object is a message of the same type (as
* defined by {@code getDescriptorForType()}) and has identical values for
* all of its fields. Subclasses must implement this; inheriting
* {@code Object.equals()} is incorrect.
*
* @param other object to be compared for equality with this message
- * @return <tt>true</tt> if the specified object is equal to this message
+ * @return {@code true} if the specified object is equal to this message
*/
@Override
boolean equals(Object other);
/**
* Returns the hash code value for this message. The hash code of a message
- * should mix the message's type (object identity of the decsriptor) with its
+ * should mix the message's type (object identity of the descriptor) with its
* contents (known and unknown field values). Subclasses must implement this;
* inheriting {@code Object.hashCode()} is incorrect.
*
@@ -83,7 +86,8 @@ public interface Message extends MessageLite, MessageOrBuilder {
/**
* Converts the message to a string in protocol buffer text format. This is
- * just a trivial wrapper around {@link TextFormat#printToString(Message)}.
+ * just a trivial wrapper around {@link
+ * TextFormat#printToString(MessageOrBuilder)}.
*/
@Override
String toString();
@@ -145,6 +149,24 @@ public interface Message extends MessageLite, MessageOrBuilder {
Builder newBuilderForField(Descriptors.FieldDescriptor field);
/**
+ * Get a nested builder instance for the given field.
+ * <p>
+ * Normally, we hold a reference to the immutable message object for the
+ * message type field. Some implementations(the generated message builders),
+ * however, can also hold a reference to the builder object (a nested
+ * builder) for the field.
+ * <p>
+ * If the field is already backed up by a nested builder, the nested builder
+ * will be returned. Otherwise, a new field builder will be created and
+ * returned. The original message field (if exist) will be merged into the
+ * field builder, which will then be nested into its parent builder.
+ * <p>
+ * NOTE: implementations that do not support nested builders will throw
+ * <code>UnsupportedException</code>.
+ */
+ Builder getFieldBuilder(Descriptors.FieldDescriptor field);
+
+ /**
* Sets a field to the given value. The value must be of the correct type
* for this field, i.e. the same type that
* {@link Message#getField(Descriptors.FieldDescriptor)} would return.
diff --git a/java/src/main/java/com/google/protobuf/MessageLite.java b/java/src/main/java/com/google/protobuf/MessageLite.java
index 31b8256e..e5b9a47b 100644
--- a/java/src/main/java/com/google/protobuf/MessageLite.java
+++ b/java/src/main/java/com/google/protobuf/MessageLite.java
@@ -79,6 +79,12 @@ public interface MessageLite extends MessageLiteOrBuilder {
*/
int getSerializedSize();
+
+ /**
+ * Gets the parser for a message of the same type as this message.
+ */
+ Parser<? extends MessageLite> getParserForType();
+
// -----------------------------------------------------------------
// Convenience methods.
@@ -144,11 +150,8 @@ public interface MessageLite extends MessageLiteOrBuilder {
Builder clear();
/**
- * Construct the final message. Once this is called, the Builder is no
- * longer valid, and calling any other method will result in undefined
- * behavior and may throw a NullPointerException. If you need to continue
- * working with the builder after calling {@code build()}, {@code clone()}
- * it first.
+ * Constructs the message based on the state of the Builder. Subsequent
+ * changes to the Builder will not affect the returned message.
* @throws UninitializedMessageException The message is missing one or more
* required fields (i.e. {@link #isInitialized()} returns false).
* Use {@link #buildPartial()} to bypass this check.
@@ -158,11 +161,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Like {@link #build()}, but does not throw an exception if the message
* is missing required fields. Instead, a partial message is returned.
- * Once this is called, the Builder is no longer valid, and calling any
- * will result in undefined behavior and may throw a NullPointerException.
- *
- * If you need to continue working with the builder after calling
- * {@code buildPartial()}, {@code clone()} it first.
+ * Subsequent changes to the Builder will not affect the returned message.
*/
MessageLite buildPartial();
@@ -174,7 +173,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Parses a message of this type from the input and merges it with this
- * message, as if using {@link Builder#mergeFrom(MessageLite)}.
+ * message.
*
* <p>Warning: This does not verify that all required fields are present in
* the input message. If you call {@link #build()} without setting all
@@ -184,11 +183,6 @@ public interface MessageLite extends MessageLiteOrBuilder {
* <ul>
* <li>Call {@link #isInitialized()} to verify that all required fields
* are set before building.
- * <li>Parse the message separately using one of the static
- * {@code parseFrom} methods, then use {@link #mergeFrom(MessageLite)}
- * to merge it with this one. {@code parseFrom} will throw an
- * {@link InvalidProtocolBufferException} (an {@code IOException})
- * if some required fields are missing.
* <li>Use {@code buildPartial()} to build, which ignores missing
* required fields.
* </ul>
@@ -225,7 +219,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Parse {@code data} as a message of this type and merge it with the
* message being built. This is just a small wrapper around
- * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+ * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
*
* @return this
*/
@@ -255,7 +249,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Parse {@code data} as a message of this type and merge it with the
* message being built. This is just a small wrapper around
- * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+ * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
*
* @return this
*/
@@ -266,7 +260,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Parse {@code data} as a message of this type and merge it with the
* message being built. This is just a small wrapper around
- * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+ * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
*
* @return this
*/
@@ -293,7 +287,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
/**
* Parse a message of this type from {@code input} and merge it with the
* message being built. This is just a small wrapper around
- * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+ * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
*
* @return this
*/
@@ -308,9 +302,9 @@ public interface MessageLite extends MessageLiteOrBuilder {
* {@link MessageLite#writeDelimitedTo(OutputStream)} to write messages in
* this format.
*
- * @returns True if successful, or false if the stream is at EOF when the
- * method starts. Any other error (including reaching EOF during
- * parsing) will cause an exception to be thrown.
+ * @return True if successful, or false if the stream is at EOF when the
+ * method starts. Any other error (including reaching EOF during
+ * parsing) will cause an exception to be thrown.
*/
boolean mergeDelimitedFrom(InputStream input)
throws IOException;
diff --git a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
index 7cc72e9c..05b2b161 100644
--- a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
+++ b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
@@ -52,6 +52,8 @@ public interface MessageLiteOrBuilder {
/**
* Returns true if all required fields in the message and all embedded
* messages are set, false otherwise.
+ *
+ * <p>See also: {@link MessageOrBuilder#getInitializationErrorString()}
*/
boolean isInitialized();
diff --git a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
index 0132e7ca..bf62d45e 100644
--- a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
+++ b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
@@ -30,6 +30,7 @@
package com.google.protobuf;
+import java.util.List;
import java.util.Map;
/**
@@ -45,6 +46,24 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
Message getDefaultInstanceForType();
/**
+ * Returns a list of field paths (e.g. "foo.bar.baz") of required fields
+ * which are not set in this message. You should call
+ * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+ * are any missing fields, as that method is likely to be much faster
+ * than this one even when the message is fully-initialized.
+ */
+ List<String> findInitializationErrors();
+
+ /**
+ * Returns a comma-delimited list of required fields which are not set
+ * in this message object. You should call
+ * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+ * are any missing fields, as that method is likely to be much faster
+ * than this one even when the message is fully-initialized.
+ */
+ String getInitializationErrorString();
+
+ /**
* Get the message's type's descriptor. This differs from the
* {@code getDescriptor()} method of generated message classes in that
* this method is an abstract method of the {@code Message} interface
@@ -80,7 +99,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
/**
* Obtains the value of the given field, or the default value if it is
* not set. For primitive fields, the boxed primitive value is returned.
- * For enum fields, the EnumValueDescriptor for the value is returend. For
+ * For enum fields, the EnumValueDescriptor for the value is returned. For
* embedded message fields, the sub-message is returned. For repeated
* fields, a java.util.List is returned.
*/
@@ -98,7 +117,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
/**
* Gets an element of a repeated field. For primitive fields, the boxed
* primitive value is returned. For enum fields, the EnumValueDescriptor
- * for the value is returend. For embedded message fields, the sub-message
+ * for the value is returned. For embedded message fields, the sub-message
* is returned.
* @throws IllegalArgumentException The field is not a repeated field, or
* {@code field.getContainingType() != getDescriptorForType()}.
diff --git a/java/src/main/java/com/google/protobuf/Parser.java b/java/src/main/java/com/google/protobuf/Parser.java
new file mode 100644
index 00000000..7d8e8217
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/Parser.java
@@ -0,0 +1,259 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.InputStream;
+
+/**
+ * Abstract interface for parsing Protocol Messages.
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public interface Parser<MessageType> {
+ /**
+ * Parses a message of {@code MessageType} from the input.
+ *
+ * <p>Note: The caller should call
+ * {@link CodedInputStream#checkLastTagWas(int)} after calling this to
+ * verify that the last tag seen was the appropriate end-group tag,
+ * or zero for EOF.
+ */
+ public MessageType parseFrom(CodedInputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(CodedInputStream)}, but also parses extensions.
+ * The extensions that you want to be able to parse must be registered in
+ * {@code extensionRegistry}. Extensions not in the registry will be treated
+ * as unknown fields.
+ */
+ public MessageType parseFrom(CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(CodedInputStream)}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialFrom(CodedInputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(CodedInputStream input, ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialFrom(CodedInputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ // ---------------------------------------------------------------
+ // Convenience methods.
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+ */
+ public MessageType parseFrom(ByteString data)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around
+ * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+ */
+ public MessageType parseFrom(ByteString data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(ByteString)}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialFrom(ByteString data)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialFrom(ByteString data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+ */
+ public MessageType parseFrom(byte[] data, int off, int len)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around
+ * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+ */
+ public MessageType parseFrom(byte[] data, int off, int len,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+ */
+ public MessageType parseFrom(byte[] data)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses {@code data} as a message of {@code MessageType}.
+ * This is just a small wrapper around
+ * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+ */
+ public MessageType parseFrom(byte[] data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(byte[], int, int)}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialFrom(byte[] data, int off, int len)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialFrom(byte[] data, int off, int len,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(byte[])}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialFrom(byte[] data)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(byte[], ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialFrom(byte[] data,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parse a message of {@code MessageType} from {@code input}.
+ * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+ * Note that this method always reads the <i>entire</i> input (unless it
+ * throws an exception). If you want it to stop earlier, you will need to
+ * wrap your input in some wrapper stream that limits reading. Or, use
+ * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write your
+ * message and {@link #parseDelimitedFrom(InputStream)} to read it.
+ * <p>
+ * Despite usually reading the entire input, this does not close the stream.
+ */
+ public MessageType parseFrom(InputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Parses a message of {@code MessageType} from {@code input}.
+ * This is just a small wrapper around
+ * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+ */
+ public MessageType parseFrom(InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(InputStream)}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialFrom(InputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(InputStream, ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialFrom(InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseFrom(InputStream)}, but does not read util EOF.
+ * Instead, the size of message (encoded as a varint) is read first,
+ * then the message data. Use
+ * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write
+ * messages in this format.
+ *
+ * @return True if successful, or false if the stream is at EOF when the
+ * method starts. Any other error (including reaching EOF during
+ * parsing) will cause an exception to be thrown.
+ */
+ public MessageType parseDelimitedFrom(InputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseDelimitedFrom(InputStream)} but supporting extensions.
+ */
+ public MessageType parseDelimitedFrom(InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseDelimitedFrom(InputStream)}, but does not throw an
+ * exception if the message is missing required fields. Instead, a partial
+ * message is returned.
+ */
+ public MessageType parsePartialDelimitedFrom(InputStream input)
+ throws InvalidProtocolBufferException;
+
+ /**
+ * Like {@link #parseDelimitedFrom(InputStream, ExtensionRegistryLite)},
+ * but does not throw an exception if the message is missing required fields.
+ * Instead, a partial message is returned.
+ */
+ public MessageType parsePartialDelimitedFrom(
+ InputStream input,
+ ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException;
+}
diff --git a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
index 0024f791..65d9270d 100644
--- a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
+++ b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
@@ -37,22 +37,22 @@ import java.util.Collections;
import java.util.List;
/**
- * <code>RepeatedFieldBuilder</code> implements a structure that a protocol
+ * {@code RepeatedFieldBuilder} implements a structure that a protocol
* message uses to hold a repeated field of other protocol messages. It supports
* the classical use case of adding immutable {@link Message}'s to the
* repeated field and is highly optimized around this (no extra memory
* allocations and sharing of immutable arrays).
* <br>
* It also supports the additional use case of adding a {@link Message.Builder}
- * to the repeated field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * to the repeated field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
* structure.
* <br>
* Logically, one can think of a tree of builders as converting the entire tree
* to messages when build is called on the root or when any method is called
* that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
* classes cache messages that were created so that messages only need to be
* created when some change occured in its builder or a builder for one of its
* descendants.
@@ -192,7 +192,7 @@ public class RepeatedFieldBuilder
/**
* Get the message at the specified index. If the message is currently stored
- * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+ * as a {@code Builder}, it is converted to a {@code Message} by
* calling {@link Message.Builder#buildPartial} on it.
*
* @param index the index of the message to get
@@ -204,7 +204,7 @@ public class RepeatedFieldBuilder
/**
* Get the message at the specified index. If the message is currently stored
- * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+ * as a {@code Builder}, it is converted to a {@code Message} by
* calling {@link Message.Builder#buildPartial} on it.
*
* @param index the index of the message to get
diff --git a/java/src/main/java/com/google/protobuf/RopeByteString.java b/java/src/main/java/com/google/protobuf/RopeByteString.java
new file mode 100644
index 00000000..8d44d117
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/RopeByteString.java
@@ -0,0 +1,945 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.ByteArrayInputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Class to represent {@code ByteStrings} formed by concatenation of other
+ * ByteStrings, without copying the data in the pieces. The concatenation is
+ * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
+ *
+ * <p>Most of the operation here is inspired by the now-famous paper <a
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
+ * michael plass
+ *
+ * <p>The algorithms described in the paper have been implemented for character
+ * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
+ * cord.cc}.
+ *
+ * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
+ * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
+ * sequence length, sequences that are too short relative to their depth cause a
+ * tree rebalance. More precisely, a tree of depth d is "balanced" in the
+ * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
+ * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
+ * lengths 1, 2, 3, 5, 8, 13,...
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class RopeByteString extends ByteString {
+
+ /**
+ * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
+ * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
+ * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
+ * least 2, of depth 4 must have length >= 8, etc.
+ *
+ * <p>There's nothing special about using the Fibonacci numbers for this, but
+ * they are a reasonable sequence for encapsulating the idea that we are OK
+ * with longer strings being encoded in deeper binary trees.
+ *
+ * <p>For 32-bit integers, this array has length 46.
+ */
+ private static final int[] minLengthByDepth;
+
+ static {
+ // Dynamically generate the list of Fibonacci numbers the first time this
+ // class is accessed.
+ List<Integer> numbers = new ArrayList<Integer>();
+
+ // we skip the first Fibonacci number (1). So instead of: 1 1 2 3 5 8 ...
+ // we have: 1 2 3 5 8 ...
+ int f1 = 1;
+ int f2 = 1;
+
+ // get all the values until we roll over.
+ while (f2 > 0) {
+ numbers.add(f2);
+ int temp = f1 + f2;
+ f1 = f2;
+ f2 = temp;
+ }
+
+ // we include this here so that we can index this array to [x + 1] in the
+ // loops below.
+ numbers.add(Integer.MAX_VALUE);
+ minLengthByDepth = new int[numbers.size()];
+ for (int i = 0; i < minLengthByDepth.length; i++) {
+ // unbox all the values
+ minLengthByDepth[i] = numbers.get(i);
+ }
+ }
+
+ private final int totalLength;
+ private final ByteString left;
+ private final ByteString right;
+ private final int leftLength;
+ private final int treeDepth;
+
+ /**
+ * Create a new RopeByteString, which can be thought of as a new tree node, by
+ * recording references to the two given strings.
+ *
+ * @param left string on the left of this node, should have {@code size() >
+ * 0}
+ * @param right string on the right of this node, should have {@code size() >
+ * 0}
+ */
+ private RopeByteString(ByteString left, ByteString right) {
+ this.left = left;
+ this.right = right;
+ leftLength = left.size();
+ totalLength = leftLength + right.size();
+ treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+ }
+
+ /**
+ * Concatenate the given strings while performing various optimizations to
+ * slow the growth rate of tree depth and tree node count. The result is
+ * either a {@link LiteralByteString} or a {@link RopeByteString}
+ * depending on which optimizations, if any, were applied.
+ *
+ * <p>Small pieces of length less than {@link
+ * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
+ * BAP95. Large pieces are referenced without copy.
+ *
+ * @param left string on the left
+ * @param right string on the right
+ * @return concatenation representing the same sequence as the given strings
+ */
+ static ByteString concatenate(ByteString left, ByteString right) {
+ ByteString result;
+ RopeByteString leftRope =
+ (left instanceof RopeByteString) ? (RopeByteString) left : null;
+ if (right.size() == 0) {
+ result = left;
+ } else if (left.size() == 0) {
+ result = right;
+ } else {
+ int newLength = left.size() + right.size();
+ if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
+ // Optimization from BAP95: For short (leaves in paper, but just short
+ // here) total length, do a copy of data to a new leaf.
+ result = concatenateBytes(left, right);
+ } else if (leftRope != null
+ && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
+ // Optimization from BAP95: As an optimization of the case where the
+ // ByteString is constructed by repeated concatenate, recognize the case
+ // where a short string is concatenated to a left-hand node whose
+ // right-hand branch is short. In the paper this applies to leaves, but
+ // we just look at the length here. This has the advantage of shedding
+ // references to unneeded data when substrings have been taken.
+ //
+ // When we recognize this case, we do a copy of the data and create a
+ // new parent node so that the depth of the result is the same as the
+ // given left tree.
+ ByteString newRight = concatenateBytes(leftRope.right, right);
+ result = new RopeByteString(leftRope.left, newRight);
+ } else if (leftRope != null
+ && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
+ && leftRope.getTreeDepth() > right.getTreeDepth()) {
+ // Typically for concatenate-built strings the left-side is deeper than
+ // the right. This is our final attempt to concatenate without
+ // increasing the tree depth. We'll redo the the node on the RHS. This
+ // is yet another optimization for building the string by repeatedly
+ // concatenating on the right.
+ ByteString newRight = new RopeByteString(leftRope.right, right);
+ result = new RopeByteString(leftRope.left, newRight);
+ } else {
+ // Fine, we'll add a node and increase the tree depth--unless we
+ // rebalance ;^)
+ int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+ if (newLength >= minLengthByDepth[newDepth]) {
+ // The tree is shallow enough, so don't rebalance
+ result = new RopeByteString(left, right);
+ } else {
+ result = new Balancer().balance(left, right);
+ }
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Concatenates two strings by copying data values. This is called in a few
+ * cases in order to reduce the growth of the number of tree nodes.
+ *
+ * @param left string on the left
+ * @param right string on the right
+ * @return string formed by copying data bytes
+ */
+ private static LiteralByteString concatenateBytes(ByteString left,
+ ByteString right) {
+ int leftSize = left.size();
+ int rightSize = right.size();
+ byte[] bytes = new byte[leftSize + rightSize];
+ left.copyTo(bytes, 0, 0, leftSize);
+ right.copyTo(bytes, 0, leftSize, rightSize);
+ return new LiteralByteString(bytes); // Constructor wraps bytes
+ }
+
+ /**
+ * Create a new RopeByteString for testing only while bypassing all the
+ * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
+ * testing trees of specific structure. We are also able to insert empty
+ * leaves, though these are dis-allowed, so that we can make sure the
+ * implementation can withstand their presence.
+ *
+ * @param left string on the left of this node
+ * @param right string on the right of this node
+ * @return an unsafe instance for testing only
+ */
+ static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
+ return new RopeByteString(left, right);
+ }
+
+ /**
+ * Gets the byte at the given index.
+ * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
+ * reasons although it would more properly be {@link
+ * IndexOutOfBoundsException}.
+ *
+ * @param index index of byte
+ * @return the value
+ * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+ */
+ @Override
+ public byte byteAt(int index) {
+ if (index < 0) {
+ throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
+ }
+ if (index > totalLength) {
+ throw new ArrayIndexOutOfBoundsException(
+ "Index > length: " + index + ", " + totalLength);
+ }
+
+ byte result;
+ // Find the relevant piece by recursive descent
+ if (index < leftLength) {
+ result = left.byteAt(index);
+ } else {
+ result = right.byteAt(index - leftLength);
+ }
+ return result;
+ }
+
+ @Override
+ public int size() {
+ return totalLength;
+ }
+
+ // =================================================================
+ // Pieces
+
+ @Override
+ protected int getTreeDepth() {
+ return treeDepth;
+ }
+
+ /**
+ * Determines if the tree is balanced according to BAP95, which means the tree
+ * is flat-enough with respect to the bounds. Note that this definition of
+ * balanced is one where sub-trees of balanced trees are not necessarily
+ * balanced.
+ *
+ * @return true if the tree is balanced
+ */
+ @Override
+ protected boolean isBalanced() {
+ return totalLength >= minLengthByDepth[treeDepth];
+ }
+
+ /**
+ * Takes a substring of this one. This involves recursive descent along the
+ * left and right edges of the substring, and referencing any wholly contained
+ * segments in between. Any leaf nodes entirely uninvolved in the substring
+ * will not be referenced by the substring.
+ *
+ * <p>Substrings of {@code length < 2} should result in at most a single
+ * recursive call chain, terminating at a leaf node. Thus the result will be a
+ * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
+ * ByteString)}.
+ *
+ * @param beginIndex start at this index
+ * @param endIndex the last character is the one before this index
+ * @return substring leaf node or tree
+ */
+ @Override
+ public ByteString substring(int beginIndex, int endIndex) {
+ if (beginIndex < 0) {
+ throw new IndexOutOfBoundsException(
+ "Beginning index: " + beginIndex + " < 0");
+ }
+ if (endIndex > totalLength) {
+ throw new IndexOutOfBoundsException(
+ "End index: " + endIndex + " > " + totalLength);
+ }
+ int substringLength = endIndex - beginIndex;
+ if (substringLength < 0) {
+ throw new IndexOutOfBoundsException(
+ "Beginning index larger than ending index: " + beginIndex + ", "
+ + endIndex);
+ }
+
+ ByteString result;
+ if (substringLength == 0) {
+ // Empty substring
+ result = ByteString.EMPTY;
+ } else if (substringLength == totalLength) {
+ // The whole string
+ result = this;
+ } else {
+ // Proper substring
+ if (endIndex <= leftLength) {
+ // Substring on the left
+ result = left.substring(beginIndex, endIndex);
+ } else if (beginIndex >= leftLength) {
+ // Substring on the right
+ result = right
+ .substring(beginIndex - leftLength, endIndex - leftLength);
+ } else {
+ // Split substring
+ ByteString leftSub = left.substring(beginIndex);
+ ByteString rightSub = right.substring(0, endIndex - leftLength);
+ // Intentionally not rebalancing, since in many cases these two
+ // substrings will already be less deep than the top-level
+ // RopeByteString we're taking a substring of.
+ result = new RopeByteString(leftSub, rightSub);
+ }
+ }
+ return result;
+ }
+
+ // =================================================================
+ // ByteString -> byte[]
+
+ @Override
+ protected void copyToInternal(byte[] target, int sourceOffset,
+ int targetOffset, int numberToCopy) {
+ if (sourceOffset + numberToCopy <= leftLength) {
+ left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+ } else if (sourceOffset >= leftLength) {
+ right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
+ numberToCopy);
+ } else {
+ int leftLength = this.leftLength - sourceOffset;
+ left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
+ right.copyToInternal(target, 0, targetOffset + leftLength,
+ numberToCopy - leftLength);
+ }
+ }
+
+ @Override
+ public void copyTo(ByteBuffer target) {
+ left.copyTo(target);
+ right.copyTo(target);
+ }
+
+ @Override
+ public ByteBuffer asReadOnlyByteBuffer() {
+ ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
+ return byteBuffer.asReadOnlyBuffer();
+ }
+
+ @Override
+ public List<ByteBuffer> asReadOnlyByteBufferList() {
+ // Walk through the list of LiteralByteString's that make up this
+ // rope, and add each one as a read-only ByteBuffer.
+ List<ByteBuffer> result = new ArrayList<ByteBuffer>();
+ PieceIterator pieces = new PieceIterator(this);
+ while (pieces.hasNext()) {
+ LiteralByteString byteString = pieces.next();
+ result.add(byteString.asReadOnlyByteBuffer());
+ }
+ return result;
+ }
+
+ @Override
+ public void writeTo(OutputStream outputStream) throws IOException {
+ left.writeTo(outputStream);
+ right.writeTo(outputStream);
+ }
+
+ @Override
+ public String toString(String charsetName)
+ throws UnsupportedEncodingException {
+ return new String(toByteArray(), charsetName);
+ }
+
+ // =================================================================
+ // UTF-8 decoding
+
+ @Override
+ public boolean isValidUtf8() {
+ int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
+ int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
+ return state == Utf8.COMPLETE;
+ }
+
+ @Override
+ protected int partialIsValidUtf8(int state, int offset, int length) {
+ int toIndex = offset + length;
+ if (toIndex <= leftLength) {
+ return left.partialIsValidUtf8(state, offset, length);
+ } else if (offset >= leftLength) {
+ return right.partialIsValidUtf8(state, offset - leftLength, length);
+ } else {
+ int leftLength = this.leftLength - offset;
+ int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
+ return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
+ }
+ }
+
+ // =================================================================
+ // equals() and hashCode()
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == this) {
+ return true;
+ }
+ if (!(other instanceof ByteString)) {
+ return false;
+ }
+
+ ByteString otherByteString = (ByteString) other;
+ if (totalLength != otherByteString.size()) {
+ return false;
+ }
+ if (totalLength == 0) {
+ return true;
+ }
+
+ // You don't really want to be calling equals on long strings, but since
+ // we cache the hashCode, we effectively cache inequality. We use the cached
+ // hashCode if it's already computed. It's arguable we should compute the
+ // hashCode here, and if we're going to be testing a bunch of byteStrings,
+ // it might even make sense.
+ if (hash != 0) {
+ int cachedOtherHash = otherByteString.peekCachedHashCode();
+ if (cachedOtherHash != 0 && hash != cachedOtherHash) {
+ return false;
+ }
+ }
+
+ return equalsFragments(otherByteString);
+ }
+
+ /**
+ * Determines if this string is equal to another of the same length by
+ * iterating over the leaf nodes. On each step of the iteration, the
+ * overlapping segments of the leaves are compared.
+ *
+ * @param other string of the same length as this one
+ * @return true if the values of this string equals the value of the given
+ * one
+ */
+ private boolean equalsFragments(ByteString other) {
+ int thisOffset = 0;
+ Iterator<LiteralByteString> thisIter = new PieceIterator(this);
+ LiteralByteString thisString = thisIter.next();
+
+ int thatOffset = 0;
+ Iterator<LiteralByteString> thatIter = new PieceIterator(other);
+ LiteralByteString thatString = thatIter.next();
+
+ int pos = 0;
+ while (true) {
+ int thisRemaining = thisString.size() - thisOffset;
+ int thatRemaining = thatString.size() - thatOffset;
+ int bytesToCompare = Math.min(thisRemaining, thatRemaining);
+
+ // At least one of the offsets will be zero
+ boolean stillEqual = (thisOffset == 0)
+ ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
+ : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
+ if (!stillEqual) {
+ return false;
+ }
+
+ pos += bytesToCompare;
+ if (pos >= totalLength) {
+ if (pos == totalLength) {
+ return true;
+ }
+ throw new IllegalStateException();
+ }
+ // We always get to the end of at least one of the pieces
+ if (bytesToCompare == thisRemaining) { // If reached end of this
+ thisOffset = 0;
+ thisString = thisIter.next();
+ } else {
+ thisOffset += bytesToCompare;
+ }
+ if (bytesToCompare == thatRemaining) { // If reached end of that
+ thatOffset = 0;
+ thatString = thatIter.next();
+ } else {
+ thatOffset += bytesToCompare;
+ }
+ }
+ }
+
+ /**
+ * Cached hash value. Intentionally accessed via a data race, which is safe
+ * because of the Java Memory Model's "no out-of-thin-air values" guarantees
+ * for ints.
+ */
+ private int hash = 0;
+
+ @Override
+ public int hashCode() {
+ int h = hash;
+
+ if (h == 0) {
+ h = totalLength;
+ h = partialHash(h, 0, totalLength);
+ if (h == 0) {
+ h = 1;
+ }
+ hash = h;
+ }
+ return h;
+ }
+
+ @Override
+ protected int peekCachedHashCode() {
+ return hash;
+ }
+
+ @Override
+ protected int partialHash(int h, int offset, int length) {
+ int toIndex = offset + length;
+ if (toIndex <= leftLength) {
+ return left.partialHash(h, offset, length);
+ } else if (offset >= leftLength) {
+ return right.partialHash(h, offset - leftLength, length);
+ } else {
+ int leftLength = this.leftLength - offset;
+ int leftPartial = left.partialHash(h, offset, leftLength);
+ return right.partialHash(leftPartial, 0, length - leftLength);
+ }
+ }
+
+ // =================================================================
+ // Input stream
+
+ @Override
+ public CodedInputStream newCodedInput() {
+ return CodedInputStream.newInstance(new RopeInputStream());
+ }
+
+ @Override
+ public InputStream newInput() {
+ return new RopeInputStream();
+ }
+
+ /**
+ * This class implements the balancing algorithm of BAP95. In the paper the
+ * authors use an array to keep track of pieces, while here we use a stack.
+ * The tree is balanced by traversing subtrees in left to right order, and the
+ * stack always contains the part of the string we've traversed so far.
+ *
+ * <p>One surprising aspect of the algorithm is the result of balancing is not
+ * necessarily balanced, though it is nearly balanced. For details, see
+ * BAP95.
+ */
+ private static class Balancer {
+ // Stack containing the part of the string, starting from the left, that
+ // we've already traversed. The final string should be the equivalent of
+ // concatenating the strings on the stack from bottom to top.
+ private final Deque<ByteString> prefixesStack =
+ new ArrayDeque<ByteString>(minLengthByDepth.length);
+
+ private ByteString balance(ByteString left, ByteString right) {
+ doBalance(left);
+ doBalance(right);
+
+ // Sweep stack to gather the result
+ ByteString partialString = prefixesStack.pop();
+ while (!prefixesStack.isEmpty()) {
+ ByteString newLeft = prefixesStack.pop();
+ partialString = new RopeByteString(newLeft, partialString);
+ }
+ // We should end up with a RopeByteString since at a minimum we will
+ // create one from concatenating left and right
+ return partialString;
+ }
+
+ private void doBalance(ByteString root) {
+ // BAP95: Insert balanced subtrees whole. This means the result might not
+ // be balanced, leading to repeated rebalancings on concatenate. However,
+ // these rebalancings are shallow due to ignoring balanced subtrees, and
+ // relatively few calls to insert() result.
+ if (root.isBalanced()) {
+ insert(root);
+ } else if (root instanceof RopeByteString) {
+ RopeByteString rbs = (RopeByteString) root;
+ doBalance(rbs.left);
+ doBalance(rbs.right);
+ } else {
+ throw new IllegalArgumentException(
+ "Has a new type of ByteString been created? Found " +
+ root.getClass());
+ }
+ }
+
+ /**
+ * Push a string on the balance stack (BAP95). BAP95 uses an array and
+ * calls the elements in the array 'bins'. We instead use a stack, so the
+ * 'bins' of lengths are represented by differences between the elements of
+ * minLengthByDepth.
+ *
+ * <p>If the length bin for our string, and all shorter length bins, are
+ * empty, we just push it on the stack. Otherwise, we need to start
+ * concatenating, putting the given string in the "middle" and continuing
+ * until we land in an empty length bin that matches the length of our
+ * concatenation.
+ *
+ * @param byteString string to place on the balance stack
+ */
+ private void insert(ByteString byteString) {
+ int depthBin = getDepthBinForLength(byteString.size());
+ int binEnd = minLengthByDepth[depthBin + 1];
+
+ // BAP95: Concatenate all trees occupying bins representing the length of
+ // our new piece or of shorter pieces, to the extent that is possible.
+ // The goal is to clear the bin which our piece belongs in, but that may
+ // not be entirely possible if there aren't enough longer bins occupied.
+ if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
+ prefixesStack.push(byteString);
+ } else {
+ int binStart = minLengthByDepth[depthBin];
+
+ // Concatenate the subtrees of shorter length
+ ByteString newTree = prefixesStack.pop();
+ while (!prefixesStack.isEmpty()
+ && prefixesStack.peek().size() < binStart) {
+ ByteString left = prefixesStack.pop();
+ newTree = new RopeByteString(left, newTree);
+ }
+
+ // Concatenate the given string
+ newTree = new RopeByteString(newTree, byteString);
+
+ // Continue concatenating until we land in an empty bin
+ while (!prefixesStack.isEmpty()) {
+ depthBin = getDepthBinForLength(newTree.size());
+ binEnd = minLengthByDepth[depthBin + 1];
+ if (prefixesStack.peek().size() < binEnd) {
+ ByteString left = prefixesStack.pop();
+ newTree = new RopeByteString(left, newTree);
+ } else {
+ break;
+ }
+ }
+ prefixesStack.push(newTree);
+ }
+ }
+
+ private int getDepthBinForLength(int length) {
+ int depth = Arrays.binarySearch(minLengthByDepth, length);
+ if (depth < 0) {
+ // It wasn't an exact match, so convert to the index of the containing
+ // fragment, which is one less even than the insertion point.
+ int insertionPoint = -(depth + 1);
+ depth = insertionPoint - 1;
+ }
+
+ return depth;
+ }
+ }
+
+ /**
+ * This class is a continuable tree traversal, which keeps the state
+ * information which would exist on the stack in a recursive traversal instead
+ * on a stack of "Bread Crumbs". The maximum depth of the stack in this
+ * iterator is the same as the depth of the tree being traversed.
+ *
+ * <p>This iterator is used to implement
+ * {@link RopeByteString#equalsFragments(ByteString)}.
+ */
+ private static class PieceIterator implements Iterator<LiteralByteString> {
+
+ private final Deque<RopeByteString> breadCrumbs =
+ new ArrayDeque<RopeByteString>(minLengthByDepth.length);
+ private LiteralByteString next;
+
+ private PieceIterator(ByteString root) {
+ next = getLeafByLeft(root);
+ }
+
+ private LiteralByteString getLeafByLeft(ByteString root) {
+ ByteString pos = root;
+ while (pos instanceof RopeByteString) {
+ RopeByteString rbs = (RopeByteString) pos;
+ breadCrumbs.push(rbs);
+ pos = rbs.left;
+ }
+ return (LiteralByteString) pos;
+ }
+
+ private LiteralByteString getNextNonEmptyLeaf() {
+ while (true) {
+ // Almost always, we go through this loop exactly once. However, if
+ // we discover an empty string in the rope, we toss it and try again.
+ if (breadCrumbs.isEmpty()) {
+ return null;
+ } else {
+ LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
+ if (!result.isEmpty()) {
+ return result;
+ }
+ }
+ }
+ }
+
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ /**
+ * Returns the next item and advances one {@code LiteralByteString}.
+ *
+ * @return next non-empty LiteralByteString or {@code null}
+ */
+ public LiteralByteString next() {
+ if (next == null) {
+ throw new NoSuchElementException();
+ }
+ LiteralByteString result = next;
+ next = getNextNonEmptyLeaf();
+ return result;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ // =================================================================
+ // ByteIterator
+
+ @Override
+ public ByteIterator iterator() {
+ return new RopeByteIterator();
+ }
+
+ private class RopeByteIterator implements ByteString.ByteIterator {
+
+ private final PieceIterator pieces;
+ private ByteIterator bytes;
+ int bytesRemaining;
+
+ private RopeByteIterator() {
+ pieces = new PieceIterator(RopeByteString.this);
+ bytes = pieces.next().iterator();
+ bytesRemaining = size();
+ }
+
+ public boolean hasNext() {
+ return (bytesRemaining > 0);
+ }
+
+ public Byte next() {
+ return nextByte(); // Does not instantiate a Byte
+ }
+
+ public byte nextByte() {
+ if (!bytes.hasNext()) {
+ bytes = pieces.next().iterator();
+ }
+ --bytesRemaining;
+ return bytes.nextByte();
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /**
+ * This class is the {@link RopeByteString} equivalent for
+ * {@link ByteArrayInputStream}.
+ */
+ private class RopeInputStream extends InputStream {
+ // Iterates through the pieces of the rope
+ private PieceIterator pieceIterator;
+ // The current piece
+ private LiteralByteString currentPiece;
+ // The size of the current piece
+ private int currentPieceSize;
+ // The index of the next byte to read in the current piece
+ private int currentPieceIndex;
+ // The offset of the start of the current piece in the rope byte string
+ private int currentPieceOffsetInRope;
+ // Offset in the buffer at which user called mark();
+ private int mark;
+
+ public RopeInputStream() {
+ initialize();
+ }
+
+ @Override
+ public int read(byte b[], int offset, int length) {
+ if (b == null) {
+ throw new NullPointerException();
+ } else if (offset < 0 || length < 0 || length > b.length - offset) {
+ throw new IndexOutOfBoundsException();
+ }
+ return readSkipInternal(b, offset, length);
+ }
+
+ @Override
+ public long skip(long length) {
+ if (length < 0) {
+ throw new IndexOutOfBoundsException();
+ } else if (length > Integer.MAX_VALUE) {
+ length = Integer.MAX_VALUE;
+ }
+ return readSkipInternal(null, 0, (int) length);
+ }
+
+ /**
+ * Internal implementation of read and skip. If b != null, then read the
+ * next {@code length} bytes into the buffer {@code b} at
+ * offset {@code offset}. If b == null, then skip the next {@code length)
+ * bytes.
+ * <p>
+ * This method assumes that all error checking has already happened.
+ * <p>
+ * Returns the actual number of bytes read or skipped.
+ */
+ private int readSkipInternal(byte b[], int offset, int length) {
+ int bytesRemaining = length;
+ while (bytesRemaining > 0) {
+ advanceIfCurrentPieceFullyRead();
+ if (currentPiece == null) {
+ if (bytesRemaining == length) {
+ // We didn't manage to read anything
+ return -1;
+ }
+ break;
+ } else {
+ // Copy the bytes from this piece.
+ int currentPieceRemaining = currentPieceSize - currentPieceIndex;
+ int count = Math.min(currentPieceRemaining, bytesRemaining);
+ if (b != null) {
+ currentPiece.copyTo(b, currentPieceIndex, offset, count);
+ offset += count;
+ }
+ currentPieceIndex += count;
+ bytesRemaining -= count;
+ }
+ }
+ // Return the number of bytes read.
+ return length - bytesRemaining;
+ }
+
+ @Override
+ public int read() throws IOException {
+ advanceIfCurrentPieceFullyRead();
+ if (currentPiece == null) {
+ return -1;
+ } else {
+ return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
+ }
+ }
+
+ @Override
+ public int available() throws IOException {
+ int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
+ return RopeByteString.this.size() - bytesRead;
+ }
+
+ @Override
+ public boolean markSupported() {
+ return true;
+ }
+
+ @Override
+ public void mark(int readAheadLimit) {
+ // Set the mark to our position in the byte string
+ mark = currentPieceOffsetInRope + currentPieceIndex;
+ }
+
+ @Override
+ public synchronized void reset() {
+ // Just reinitialize and skip the specified number of bytes.
+ initialize();
+ readSkipInternal(null, 0, mark);
+ }
+
+ /** Common initialization code used by both the constructor and reset() */
+ private void initialize() {
+ pieceIterator = new PieceIterator(RopeByteString.this);
+ currentPiece = pieceIterator.next();
+ currentPieceSize = currentPiece.size();
+ currentPieceIndex = 0;
+ currentPieceOffsetInRope = 0;
+ }
+
+ /**
+ * Skips to the next piece if we have read all the data in the current
+ * piece. Sets currentPiece to null if we have reached the end of the
+ * input.
+ */
+ private void advanceIfCurrentPieceFullyRead() {
+ if (currentPiece != null && currentPieceIndex == currentPieceSize) {
+ // Generally, we can only go through this loop at most once, since
+ // empty strings can't end up in a rope. But better to test.
+ currentPieceOffsetInRope += currentPieceSize;
+ currentPieceIndex = 0;
+ if (pieceIterator.hasNext()) {
+ currentPiece = pieceIterator.next();
+ currentPieceSize = currentPiece.size();
+ } else {
+ currentPiece = null;
+ currentPieceSize = 0;
+ }
+ }
+ }
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
index d4475f66..4bfc9f34 100644
--- a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
+++ b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
@@ -31,21 +31,21 @@
package com.google.protobuf;
/**
- * <code>SingleFieldBuilder</code> implements a structure that a protocol
+ * {@code SingleFieldBuilder} implements a structure that a protocol
* message uses to hold a single field of another protocol message. It supports
* the classical use case of setting an immutable {@link Message} as the value
* of the field and is highly optimized around this.
* <br>
* It also supports the additional use case of setting a {@link Message.Builder}
- * as the field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * as the field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
* structure.
* <br>
* Logically, one can think of a tree of builders as converting the entire tree
* to messages when build is called on the root or when any method is called
* that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
* classes cache messages that were created so that messages only need to be
* created when some change occured in its builder or a builder for one of its
* descendants.
@@ -99,7 +99,7 @@ public class SingleFieldBuilder
/**
* Get the message for the field. If the message is currently stored
- * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+ * as a {@code Builder}, it is converted to a {@code Message} by
* calling {@link Message.Builder#buildPartial} on it. If no message has
* been set, returns the default instance of the message.
*
diff --git a/java/src/main/java/com/google/protobuf/SmallSortedMap.java b/java/src/main/java/com/google/protobuf/SmallSortedMap.java
index 1cf270f3..c6cad6af 100644
--- a/java/src/main/java/com/google/protobuf/SmallSortedMap.java
+++ b/java/src/main/java/com/google/protobuf/SmallSortedMap.java
@@ -51,14 +51,14 @@ import java.util.SortedMap;
* remaining entries are stored in an overflow map. Iteration over the entries
* in the map should be done as follows:
*
- * <pre>
- * for (int i = 0; i &lt; fieldMap.getNumArrayEntries(); i++) {
+ * <pre> {@code
+ * for (int i = 0; i < fieldMap.getNumArrayEntries(); i++) {
* process(fieldMap.getArrayEntryAt(i));
* }
- * for (Map.Entry&lt;K, V&gt; entry : fieldMap.getOverflowEntries()) {
+ * for (Map.Entry<K, V> entry : fieldMap.getOverflowEntries()) {
* process(entry);
* }
- * </pre>
+ * }</pre>
*
* The resulting iteration is in order of ascending field tag number. The
* object returned by {@link #entrySet()} adheres to the same contract but is
@@ -394,7 +394,7 @@ class SmallSortedMap<K extends Comparable<K>, V> extends AbstractMap<K, V> {
/**
* Entry implementation that implements Comparable in order to support
- * binary search witin the entry array. Also checks mutability in
+ * binary search within the entry array. Also checks mutability in
* {@link #setValue()}.
*/
private class Entry implements Map.Entry<K, V>, Comparable<Entry> {
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index d5fbdabf..ed462899 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -55,15 +55,18 @@ import java.util.regex.Pattern;
public final class TextFormat {
private TextFormat() {}
- private static final Printer DEFAULT_PRINTER = new Printer(false);
- private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
+ private static final Printer DEFAULT_PRINTER = new Printer();
+ private static final Printer SINGLE_LINE_PRINTER =
+ (new Printer()).setSingleLineMode(true);
+ private static final Printer UNICODE_PRINTER =
+ (new Printer()).setEscapeNonAscii(false);
/**
* Outputs a textual representation of the Protocol Message supplied into
* the parameter output. (This representation is the new version of the
* classic "ProtocolPrinter" output from the original Protocol Buffer system)
*/
- public static void print(final Message message, final Appendable output)
+ public static void print(final MessageOrBuilder message, final Appendable output)
throws IOException {
DEFAULT_PRINTER.print(message, new TextGenerator(output));
}
@@ -79,7 +82,7 @@ public final class TextFormat {
* Generates a human readable form of this message, useful for debugging and
* other purposes, with no newline characters.
*/
- public static String shortDebugString(final Message message) {
+ public static String shortDebugString(final MessageOrBuilder message) {
try {
final StringBuilder sb = new StringBuilder();
SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
@@ -109,7 +112,7 @@ public final class TextFormat {
* Like {@code print()}, but writes directly to a {@code String} and
* returns it.
*/
- public static String printToString(final Message message) {
+ public static String printToString(final MessageOrBuilder message) {
try {
final StringBuilder text = new StringBuilder();
print(message, text);
@@ -133,6 +136,34 @@ public final class TextFormat {
}
}
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final MessageOrBuilder message) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.print(message, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Same as {@code printToString()}, except that non-ASCII characters
+ * in string type fields are not escaped in backslash+octals.
+ */
+ public static String printToUnicodeString(final UnknownFieldSet fields) {
+ try {
+ final StringBuilder text = new StringBuilder();
+ UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+ return text.toString();
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
public static void printField(final FieldDescriptor field,
final Object value,
final Appendable output)
@@ -216,13 +247,26 @@ public final class TextFormat {
/** Helper class for converting protobufs to text. */
private static final class Printer {
/** Whether to omit newlines from the output. */
- final boolean singleLineMode;
+ boolean singleLineMode = false;
+
+ /** Whether to escape non ASCII characters with backslash and octal. */
+ boolean escapeNonAscii = true;
+
+ private Printer() {}
- private Printer(final boolean singleLineMode) {
+ /** Setter of singleLineMode */
+ private Printer setSingleLineMode(boolean singleLineMode) {
this.singleLineMode = singleLineMode;
+ return this;
+ }
+
+ /** Setter of escapeNonAscii */
+ private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+ this.escapeNonAscii = escapeNonAscii;
+ return this;
}
- private void print(final Message message, final TextGenerator generator)
+ private void print(final MessageOrBuilder message, final TextGenerator generator)
throws IOException {
for (Map.Entry<FieldDescriptor, Object> field
: message.getAllFields().entrySet()) {
@@ -339,7 +383,9 @@ public final class TextFormat {
case STRING:
generator.print("\"");
- generator.print(escapeText((String) value));
+ generator.print(escapeNonAscii ?
+ escapeText((String) value) :
+ (String) value);
generator.print("\"");
break;
@@ -541,7 +587,7 @@ public final class TextFormat {
private int previousLine = 0;
private int previousColumn = 0;
- // We use possesive quantifiers (*+ and ++) because otherwise the Java
+ // We use possessive quantifiers (*+ and ++) because otherwise the Java
// regex matcher has stack overflows on large inputs.
private static final Pattern WHITESPACE =
Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -864,7 +910,7 @@ public final class TextFormat {
public ParseException parseException(final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (line + 1) + ":" + (column + 1) + ": " + description);
+ line + 1, column + 1, description);
}
/**
@@ -875,7 +921,7 @@ public final class TextFormat {
final String description) {
// Note: People generally prefer one-based line and column numbers.
return new ParseException(
- (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+ previousLine + 1, previousColumn + 1, description);
}
/**
@@ -900,8 +946,45 @@ public final class TextFormat {
public static class ParseException extends IOException {
private static final long serialVersionUID = 3196188060225107702L;
+ private final int line;
+ private final int column;
+
+ /** Create a new instance, with -1 as the line and column numbers. */
public ParseException(final String message) {
- super(message);
+ this(-1, -1, message);
+ }
+
+ /**
+ * Create a new instance
+ *
+ * @param line the line number where the parse error occurred,
+ * using 1-offset.
+ * @param column the column number where the parser error occurred,
+ * using 1-offset.
+ */
+ public ParseException(final int line, final int column,
+ final String message) {
+ super(Integer.toString(line) + ":" + column + ": " + message);
+ this.line = line;
+ this.column = column;
+ }
+
+ /**
+ * Return the line where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getLine() {
+ return line;
+ }
+
+ /**
+ * Return the column where the parse exception occurred, or -1 when
+ * none is provided. The value is specified as 1-offset, so the first
+ * line is line 1.
+ */
+ public int getColumn() {
+ return column;
}
}
@@ -1073,7 +1156,7 @@ public final class TextFormat {
mergeField(tokenizer, extensionRegistry, subBuilder);
}
- value = subBuilder.build();
+ value = subBuilder.buildPartial();
} else {
tokenizer.consume(":");
@@ -1212,7 +1295,7 @@ public final class TextFormat {
*/
static ByteString unescapeBytes(final CharSequence charString)
throws InvalidEscapeSequenceException {
- // First convert the Java characater sequence to UTF-8 bytes.
+ // First convert the Java character sequence to UTF-8 bytes.
ByteString input = ByteString.copyFromUtf8(charString.toString());
// Then unescape certain byte sequences introduced by ASCII '\\'. The valid
// escapes can all be expressed with ASCII characters, so it is safe to
@@ -1349,7 +1432,7 @@ public final class TextFormat {
/**
* Parse a 32-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static int parseInt32(final String text) throws NumberFormatException {
return (int) parseInteger(text, true, false);
@@ -1358,7 +1441,7 @@ public final class TextFormat {
/**
* Parse a 32-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code int} when returned since Java has
* no unsigned integer type.
*/
@@ -1369,7 +1452,7 @@ public final class TextFormat {
/**
* Parse a 64-bit signed integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively.
+ * and "0" to signify hexadecimal and octal numbers, respectively.
*/
static long parseInt64(final String text) throws NumberFormatException {
return parseInteger(text, true, true);
@@ -1378,7 +1461,7 @@ public final class TextFormat {
/**
* Parse a 64-bit unsigned integer from the text. Unlike the Java standard
* {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
- * and "0" to signify hexidecimal and octal numbers, respectively. The
+ * and "0" to signify hexadecimal and octal numbers, respectively. The
* result is coerced to a (signed) {@code long} when returned since Java has
* no unsigned long type.
*/
diff --git a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
index 26a15d00..45e2e6e4 100644
--- a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
+++ b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
@@ -46,7 +46,7 @@ import java.util.TreeMap;
* {@code UnknownFieldSet} is used to keep track of fields which were seen when
* parsing a protocol message but whose field numbers or types are unrecognized.
* This most frequently occurs when new fields are added to a message type
- * and then messages containing those feilds are read by old software that was
+ * and then messages containing those fields are read by old software that was
* compiled before the new types were added.
*
* <p>Every {@link Message} contains an {@code UnknownFieldSet} (and every
@@ -468,7 +468,7 @@ public final class UnknownFieldSet implements MessageLite {
/**
* Parse a single field from {@code input} and merge it into this set.
* @param tag The field's tag number, which was already parsed.
- * @return {@code false} if the tag is an engroup tag.
+ * @return {@code false} if the tag is an end group tag.
*/
public boolean mergeFieldFrom(final int tag, final CodedInputStream input)
throws IOException {
@@ -950,4 +950,29 @@ public final class UnknownFieldSet implements MessageLite {
}
}
}
+
+ /**
+ * Parser to implement MessageLite interface.
+ */
+ public static final class Parser extends AbstractParser<UnknownFieldSet> {
+ public UnknownFieldSet parsePartialFrom(
+ CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+ throws InvalidProtocolBufferException {
+ Builder builder = newBuilder();
+ try {
+ builder.mergeFrom(input);
+ } catch (InvalidProtocolBufferException e) {
+ throw e.setUnfinishedMessage(builder.buildPartial());
+ } catch (IOException e) {
+ throw new InvalidProtocolBufferException(e.getMessage())
+ .setUnfinishedMessage(builder.buildPartial());
+ }
+ return builder.buildPartial();
+ }
+ }
+
+ private static final Parser PARSER = new Parser();
+ public final Parser getParserForType() {
+ return PARSER;
+ }
}
diff --git a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
index 83e5c796..f80f0968 100644
--- a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
+++ b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
@@ -32,6 +32,7 @@ package com.google.protobuf;
import java.util.AbstractList;
import java.util.RandomAccess;
+import java.util.List;
import java.util.ListIterator;
import java.util.Iterator;
@@ -143,4 +144,10 @@ public class UnmodifiableLazyStringList extends AbstractList<String>
}
};
}
+
+ @Override
+ public List<?> getUnderlyingElements() {
+ // The returned value is already unmodifiable.
+ return list.getUnderlyingElements();
+ }
}
diff --git a/java/src/main/java/com/google/protobuf/Utf8.java b/java/src/main/java/com/google/protobuf/Utf8.java
new file mode 100644
index 00000000..388f7fc5
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/Utf8.java
@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * A set of low-level, high-performance static utility methods related
+ * to the UTF-8 character encoding. This class has no dependencies
+ * outside of the core JDK libraries.
+ *
+ * <p>There are several variants of UTF-8. The one implemented by
+ * this class is the restricted definition of UTF-8 introduced in
+ * Unicode 3.1, which mandates the rejection of "overlong" byte
+ * sequences as well as rejection of 3-byte surrogate codepoint byte
+ * sequences. Note that the UTF-8 decoder included in Oracle's JDK
+ * has been modified to also reject "overlong" byte sequences, but (as
+ * of 2011) still accepts 3-byte surrogate codepoint byte sequences.
+ *
+ * <p>The byte sequences considered valid by this class are exactly
+ * those that can be roundtrip converted to Strings and back to bytes
+ * using the UTF-8 charset, without loss: <pre> {@code
+ * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * <p>This class supports decoding of partial byte sequences, so that the
+ * bytes in a complete UTF-8 byte sequences can be stored in multiple
+ * segments. Methods typically return {@link #MALFORMED} if the partial
+ * byte sequence is definitely not well-formed, {@link #COMPLETE} if it is
+ * well-formed in the absence of additional input, or if the byte sequence
+ * apparently terminated in the middle of a character, an opaque integer
+ * "state" value containing enough information to decode the character when
+ * passed to a subsequent invocation of a partial decoding method.
+ *
+ * @author martinrb@google.com (Martin Buchholz)
+ */
+final class Utf8 {
+ private Utf8() {}
+
+ /**
+ * State value indicating that the byte sequence is well-formed and
+ * complete (no further bytes are needed to complete a character).
+ */
+ public static final int COMPLETE = 0;
+
+ /**
+ * State value indicating that the byte sequence is definitely not
+ * well-formed.
+ */
+ public static final int MALFORMED = -1;
+
+ // Other state values include the partial bytes of the incomplete
+ // character to be decoded in the simplest way: we pack the bytes
+ // into the state int in little-endian order. For example:
+ //
+ // int state = byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+ //
+ // Such a state is unpacked thus (note the ~ operation for byte2 to
+ // undo byte1's sign-extension bits):
+ //
+ // int byte1 = (byte) state;
+ // int byte2 = (byte) ~(state >> 8);
+ // int byte3 = (byte) (state >> 16);
+ //
+ // We cannot store a zero byte in the state because it would be
+ // indistinguishable from the absence of a byte. But we don't need
+ // to, because partial bytes must always be negative. When building
+ // a state, we ensure that byte1 is negative and subsequent bytes
+ // are valid trailing bytes.
+
+ /**
+ * Returns {@code true} if the given byte array is a well-formed
+ * UTF-8 byte sequence.
+ *
+ * <p>This is a convenience method, equivalent to a call to {@code
+ * isValidUtf8(bytes, 0, bytes.length)}.
+ */
+ public static boolean isValidUtf8(byte[] bytes) {
+ return isValidUtf8(bytes, 0, bytes.length);
+ }
+
+ /**
+ * Returns {@code true} if the given byte array slice is a
+ * well-formed UTF-8 byte sequence. The range of bytes to be
+ * checked extends from index {@code index}, inclusive, to {@code
+ * limit}, exclusive.
+ *
+ * <p>This is a convenience method, equivalent to {@code
+ * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+ */
+ public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
+ return partialIsValidUtf8(bytes, index, limit) == COMPLETE;
+ }
+
+ /**
+ * Tells whether the given byte array slice is a well-formed,
+ * malformed, or incomplete UTF-8 byte sequence. The range of bytes
+ * to be checked extends from index {@code index}, inclusive, to
+ * {@code limit}, exclusive.
+ *
+ * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding
+ * operation) or the value returned from a call to a partial decoding method
+ * for the previous bytes
+ *
+ * @return {@link #MALFORMED} if the partial byte sequence is
+ * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+ * (no additional input needed), or if the byte sequence is
+ * "incomplete", i.e. apparently terminated in the middle of a character,
+ * an opaque integer "state" value containing enough information to
+ * decode the character when passed to a subsequent invocation of a
+ * partial decoding method.
+ */
+ public static int partialIsValidUtf8(
+ int state, byte[] bytes, int index, int limit) {
+ if (state != COMPLETE) {
+ // The previous decoding operation was incomplete (or malformed).
+ // We look for a well-formed sequence consisting of bytes from
+ // the previous decoding operation (stored in state) together
+ // with bytes from the array slice.
+ //
+ // We expect such "straddler characters" to be rare.
+
+ if (index >= limit) { // No bytes? No progress.
+ return state;
+ }
+ int byte1 = (byte) state;
+ // byte1 is never ASCII.
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2 ||
+ // byte2 trailing-byte test
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ // Get byte2 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ if (byte2 == 0) {
+ byte2 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ }
+ if (byte2 > (byte) 0xBF ||
+ // overlong? 5 most significant bits must not all be zero
+ (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+ // illegal surrogate codepoint?
+ (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+ // byte3 trailing-byte test
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ // Get byte2 and byte3 from saved state or array
+ int byte2 = (byte) ~(state >> 8);
+ int byte3 = 0;
+ if (byte2 == 0) {
+ byte2 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2);
+ }
+ } else {
+ byte3 = (byte) (state >> 16);
+ }
+ if (byte3 == 0) {
+ byte3 = bytes[index++];
+ if (index >= limit) {
+ return incompleteStateFor(byte1, byte2, byte3);
+ }
+ }
+
+ // If we were called with state == MALFORMED, then byte1 is 0xFF,
+ // which never occurs in well-formed UTF-8, and so we will return
+ // MALFORMED again below.
+
+ if (byte2 > (byte) 0xBF ||
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+ // byte3 trailing-byte test
+ byte3 > (byte) 0xBF ||
+ // byte4 trailing-byte test
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+
+ return partialIsValidUtf8(bytes, index, limit);
+ }
+
+ /**
+ * Tells whether the given byte array slice is a well-formed,
+ * malformed, or incomplete UTF-8 byte sequence. The range of bytes
+ * to be checked extends from index {@code index}, inclusive, to
+ * {@code limit}, exclusive.
+ *
+ * <p>This is a convenience method, equivalent to a call to {@code
+ * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}.
+ *
+ * @return {@link #MALFORMED} if the partial byte sequence is
+ * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+ * (no additional input needed), or if the byte sequence is
+ * "incomplete", i.e. apparently terminated in the middle of a character,
+ * an opaque integer "state" value containing enough information to
+ * decode the character when passed to a subsequent invocation of a
+ * partial decoding method.
+ */
+ public static int partialIsValidUtf8(
+ byte[] bytes, int index, int limit) {
+ // Optimize for 100% ASCII.
+ // Hotspot loves small simple top-level loops like this.
+ while (index < limit && bytes[index] >= 0) {
+ index++;
+ }
+
+ return (index >= limit) ? COMPLETE :
+ partialIsValidUtf8NonAscii(bytes, index, limit);
+ }
+
+ private static int partialIsValidUtf8NonAscii(
+ byte[] bytes, int index, int limit) {
+ for (;;) {
+ int byte1, byte2;
+
+ // Optimize for interior runs of ASCII bytes.
+ do {
+ if (index >= limit) {
+ return COMPLETE;
+ }
+ } while ((byte1 = bytes[index++]) >= 0);
+
+ if (byte1 < (byte) 0xE0) {
+ // two-byte form
+
+ if (index >= limit) {
+ return byte1;
+ }
+
+ // Simultaneously checks for illegal trailing-byte in
+ // leading position and overlong 2-byte form.
+ if (byte1 < (byte) 0xC2 ||
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else if (byte1 < (byte) 0xF0) {
+ // three-byte form
+
+ if (index >= limit - 1) { // incomplete sequence
+ return incompleteStateFor(bytes, index, limit);
+ }
+ if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+ // overlong? 5 most significant bits must not all be zero
+ (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+ // check for illegal surrogate codepoints
+ (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+ // byte3 trailing-byte test
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ } else {
+ // four-byte form
+
+ if (index >= limit - 2) { // incomplete sequence
+ return incompleteStateFor(bytes, index, limit);
+ }
+ if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+ // Check that 1 <= plane <= 16. Tricky optimized form of:
+ // if (byte1 > (byte) 0xF4 ||
+ // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+ // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+ (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+ // byte3 trailing-byte test
+ bytes[index++] > (byte) 0xBF ||
+ // byte4 trailing-byte test
+ bytes[index++] > (byte) 0xBF) {
+ return MALFORMED;
+ }
+ }
+ }
+ }
+
+ private static int incompleteStateFor(int byte1) {
+ return (byte1 > (byte) 0xF4) ?
+ MALFORMED : byte1;
+ }
+
+ private static int incompleteStateFor(int byte1, int byte2) {
+ return (byte1 > (byte) 0xF4 ||
+ byte2 > (byte) 0xBF) ?
+ MALFORMED : byte1 ^ (byte2 << 8);
+ }
+
+ private static int incompleteStateFor(int byte1, int byte2, int byte3) {
+ return (byte1 > (byte) 0xF4 ||
+ byte2 > (byte) 0xBF ||
+ byte3 > (byte) 0xBF) ?
+ MALFORMED : byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+ }
+
+ private static int incompleteStateFor(byte[] bytes, int index, int limit) {
+ int byte1 = bytes[index - 1];
+ switch (limit - index) {
+ case 0: return incompleteStateFor(byte1);
+ case 1: return incompleteStateFor(byte1, bytes[index]);
+ case 2: return incompleteStateFor(byte1, bytes[index], bytes[index + 1]);
+ default: throw new AssertionError();
+ }
+ }
+}
diff --git a/java/src/main/java/com/google/protobuf/WireFormat.java b/java/src/main/java/com/google/protobuf/WireFormat.java
index a30f2a3c..dd2d6310 100644
--- a/java/src/main/java/com/google/protobuf/WireFormat.java
+++ b/java/src/main/java/com/google/protobuf/WireFormat.java
@@ -146,7 +146,7 @@ public final class WireFormat {
public boolean isPackable() { return true; }
}
- // Field numbers for feilds in MessageSet wire format.
+ // Field numbers for fields in MessageSet wire format.
static final int MESSAGE_SET_ITEM = 1;
static final int MESSAGE_SET_TYPE_ID = 2;
static final int MESSAGE_SET_MESSAGE = 3;