Down-integrate from internal branch

author: xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2012-09-22 02:40:50 +0000
committer: xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2012-09-22 02:40:50 +0000
commit: b55a20fa2c669b181f47ea9219b8e74d1263da19 (patch)
tree: 3936a0e7c22196587a6d8397372de41434fe2129 /java/src/main
parent: 9ced30caf94bb4e7e9629c199679ff44e8ca7389 (diff)
download: protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.gz
protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.bz2
protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.zip
33 files changed, 4519 insertions, 629 deletions
diff --git a/java/src/main/java/com/google/protobuf/AbstractMessage.java b/java/src/main/java/com/google/protobuf/AbstractMessage.java
index b9d83016..f4d115de 100644
--- a/java/src/main/java/com/google/protobuf/AbstractMessage.java
+++ b/java/src/main/java/com/google/protobuf/AbstractMessage.java
@@ -32,6 +32,7 @@ package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.Descriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.GeneratedMessage.ExtendableBuilder;
 import com.google.protobuf.Internal.EnumLite;
 
 import java.io.IOException;
@@ -81,6 +82,25 @@ public abstract class AbstractMessage extends AbstractMessageLite
     return true;
   }
 
+  public List<String> findInitializationErrors() {
+    return Builder.findMissingFields(this);
+  }
+
+  public String getInitializationErrorString() {
+    return delimitWithCommas(findInitializationErrors());
+  }
+
+  private static String delimitWithCommas(List<String> parts) {
+    StringBuilder result = new StringBuilder();
+    for (String part : parts) {
+      if (result.length() > 0) {
+        result.append(", ");
+      }
+      result.append(part);
+    }
+    return result.toString();
+  }
+
   @Override
   public final String toString() {
     return TextFormat.printToString(this);
@@ -209,6 +229,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
   }
 
   /**
+   * Package private helper method for AbstractParser to create
+   * UninitializedMessageException with missing field information.
+   */
+  @Override
+  UninitializedMessageException newUninitializedMessageException() {
+    return Builder.newUninitializedMessageException(this);
+  }
+
+  /**
    * Helper method for implementing {@link Message#hashCode()}.
    * <p>
    * This is needed because {@link java.lang.Enum#hashCode()} is final, but we
@@ -251,6 +280,14 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    public List<String> findInitializationErrors() {
+      return findMissingFields(this);
+    }
+
+    public String getInitializationErrorString() {
+      return delimitWithCommas(findInitializationErrors());
+    }
+
     public BuilderType mergeFrom(final Message other) {
       if (other.getDescriptorForType() != getDescriptorForType()) {
         throw new IllegalArgumentException(
@@ -314,7 +351,7 @@ public abstract class AbstractMessage extends AbstractMessageLite
         }
 
         if (!mergeFieldFrom(input, unknownFields, extensionRegistry,
-                            this, tag)) {
+                            getDescriptorForType(), this, null, tag)) {
           // end group tag
           break;
         }
@@ -323,25 +360,93 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void addRepeatedField(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Object value) {
+      if (builder != null) {
+        builder.addRepeatedField(field, value);
+      } else {
+        extensions.addRepeatedField(field, value);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void setField(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Object value) {
+      if (builder != null) {
+        builder.setField(field, value);
+      } else {
+        extensions.setField(field, value);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static boolean hasOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field) {
+      if (builder != null) {
+        return builder.hasField(field);
+      } else {
+        return extensions.hasField(field);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static Message getOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field) {
+      if (builder != null) {
+        return (Message) builder.getField(field);
+      } else {
+        return (Message) extensions.getField(field);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void mergeOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Message.Builder subBuilder) {
+      Message originalMessage = getOriginalMessage(builder, extensions, field);
+      if (originalMessage != null) {
+        subBuilder.mergeFrom(originalMessage);
+      }
+    }
+
     /**
-     * Like {@link #mergeFrom(CodedInputStream, UnknownFieldSet.Builder,
-     * ExtensionRegistryLite, Message.Builder)}, but parses a single field.
+     * Like {@link #mergeFrom(CodedInputStream, ExtensionRegistryLite)}, but
+     * parses a single field.
+     *
+     * When {@code builder} is not null, the method will parse and merge the
+     * field into {@code builder}. Otherwise, it will try to parse the field
+     * into {@code extensions}, when it's called by the parsing constructor in
+     * generated classes.
+     *
      * Package-private because it is used by GeneratedMessage.ExtendableMessage.
      * @param tag The tag, which should have already been read.
      * @return {@code true} unless the tag is an end-group tag.
      */
     static boolean mergeFieldFrom(
-        final CodedInputStream input,
-        final UnknownFieldSet.Builder unknownFields,
-        final ExtensionRegistryLite extensionRegistry,
-        final Message.Builder builder,
-        final int tag) throws IOException {
-      final Descriptor type = builder.getDescriptorForType();
-
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        Descriptor type,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        int tag) throws IOException {
       if (type.getOptions().getMessageSetWireFormat() &&
           tag == WireFormat.MESSAGE_SET_ITEM_TAG) {
         mergeMessageSetExtensionFromCodedStream(
-          input, unknownFields, extensionRegistry, builder);
+            input, unknownFields, extensionRegistry, type, builder, extensions);
         return true;
       }
 
@@ -376,8 +481,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
         } else {
           field = null;
         }
-      } else {
+      } else if (builder != null) {
         field = type.findFieldByNumber(fieldNumber);
+      } else {
+        field = null;
       }
 
       boolean unknown = false;
@@ -413,13 +520,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
               // enum, drop it (don't even add it to unknownFields).
               return true;
             }
-            builder.addRepeatedField(field, value);
+            addRepeatedField(builder, extensions, field, value);
           }
         } else {
           while (input.getBytesUntilLimit() > 0) {
             final Object value =
               FieldSet.readPrimitiveField(input, field.getLiteType());
-            builder.addRepeatedField(field, value);
+            addRepeatedField(builder, extensions, field, value);
           }
         }
         input.popLimit(limit);
@@ -434,10 +541,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
               subBuilder = builder.newBuilderForField(field);
             }
             if (!field.isRepeated()) {
-              subBuilder.mergeFrom((Message) builder.getField(field));
+              mergeOriginalMessage(builder, extensions, field, subBuilder);
             }
             input.readGroup(field.getNumber(), subBuilder, extensionRegistry);
-            value = subBuilder.build();
+            value = subBuilder.buildPartial();
             break;
           }
           case MESSAGE: {
@@ -448,10 +555,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
               subBuilder = builder.newBuilderForField(field);
             }
             if (!field.isRepeated()) {
-              subBuilder.mergeFrom((Message) builder.getField(field));
+              mergeOriginalMessage(builder, extensions, field, subBuilder);
             }
             input.readMessage(subBuilder, extensionRegistry);
-            value = subBuilder.build();
+            value = subBuilder.buildPartial();
             break;
           }
           case ENUM:
@@ -470,22 +577,28 @@ public abstract class AbstractMessage extends AbstractMessageLite
         }
 
         if (field.isRepeated()) {
-          builder.addRepeatedField(field, value);
+          addRepeatedField(builder, extensions, field, value);
         } else {
-          builder.setField(field, value);
+          setField(builder, extensions, field, value);
         }
       }
 
       return true;
     }
 
-    /** Called by {@code #mergeFieldFrom()} to parse a MessageSet extension. */
+    /**
+     * Called by {@code #mergeFieldFrom()} to parse a MessageSet extension.
+     * If {@code builder} is not null, this method will merge MessageSet into
+     * the builder.  Otherwise, it will merge the MessageSet into {@code
+     * extensions}.
+     */
     private static void mergeMessageSetExtensionFromCodedStream(
-        final CodedInputStream input,
-        final UnknownFieldSet.Builder unknownFields,
-        final ExtensionRegistryLite extensionRegistry,
-        final Message.Builder builder) throws IOException {
-      final Descriptor type = builder.getDescriptorForType();
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        Descriptor type,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
 
       // The wire format for MessageSet is:
       //   message MessageSet {
@@ -504,10 +617,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
       // should be prepared to accept them.
 
       int typeId = 0;
-      ByteString rawBytes = null;  // If we encounter "message" before "typeId"
-      Message.Builder subBuilder = null;
-      FieldDescriptor field = null;
+      ByteString rawBytes = null; // If we encounter "message" before "typeId"
+      ExtensionRegistry.ExtensionInfo extension = null;
 
+      // Read bytes from input, if we get it's type first then parse it eagerly,
+      // otherwise we store the raw bytes in a local variable.
       while (true) {
         final int tag = input.readTag();
         if (tag == 0) {
@@ -516,75 +630,121 @@ public abstract class AbstractMessage extends AbstractMessageLite
 
         if (tag == WireFormat.MESSAGE_SET_TYPE_ID_TAG) {
           typeId = input.readUInt32();
-          // Zero is not a valid type ID.
           if (typeId != 0) {
-            final ExtensionRegistry.ExtensionInfo extension;
-
             // extensionRegistry may be either ExtensionRegistry or
-            // ExtensionRegistryLite.  Since the type we are parsing is a full
+            // ExtensionRegistryLite. Since the type we are parsing is a full
             // message, only a full ExtensionRegistry could possibly contain
-            // extensions of it.  Otherwise we will treat the registry as if it
+            // extensions of it. Otherwise we will treat the registry as if it
             // were empty.
             if (extensionRegistry instanceof ExtensionRegistry) {
               extension = ((ExtensionRegistry) extensionRegistry)
                   .findExtensionByNumber(type, typeId);
-            } else {
-              extension = null;
-            }
-
-            if (extension != null) {
-              field = extension.descriptor;
-              subBuilder = extension.defaultInstance.newBuilderForType();
-              final Message originalMessage = (Message)builder.getField(field);
-              if (originalMessage != null) {
-                subBuilder.mergeFrom(originalMessage);
-              }
-              if (rawBytes != null) {
-                // We already encountered the message.  Parse it now.
-                subBuilder.mergeFrom(
-                  CodedInputStream.newInstance(rawBytes.newInput()));
-                rawBytes = null;
-              }
-            } else {
-              // Unknown extension number.  If we already saw data, put it
-              // in rawBytes.
-              if (rawBytes != null) {
-                unknownFields.mergeField(typeId,
-                  UnknownFieldSet.Field.newBuilder()
-                    .addLengthDelimited(rawBytes)
-                    .build());
-                rawBytes = null;
-              }
             }
           }
+
         } else if (tag == WireFormat.MESSAGE_SET_MESSAGE_TAG) {
-          if (typeId == 0) {
-            // We haven't seen a type ID yet, so we have to store the raw bytes
-            // for now.
-            rawBytes = input.readBytes();
-          } else if (subBuilder == null) {
-            // We don't know how to parse this.  Ignore it.
-            unknownFields.mergeField(typeId,
-              UnknownFieldSet.Field.newBuilder()
-                .addLengthDelimited(input.readBytes())
-                .build());
-          } else {
-            // We already know the type, so we can parse directly from the input
-            // with no copying.  Hooray!
-            input.readMessage(subBuilder, extensionRegistry);
+          if (typeId != 0) {
+            if (extension != null && ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+              // We already know the type, so we can parse directly from the
+              // input with no copying.  Hooray!
+              eagerlyMergeMessageSetExtension(
+                  input, extension, extensionRegistry, builder, extensions);
+              rawBytes = null;
+              continue;
+            }
           }
-        } else {
-          // Unknown tag.  Skip it.
+          // We haven't seen a type ID yet or we want parse message lazily.
+          rawBytes = input.readBytes();
+
+        } else { // Unknown tag. Skip it.
           if (!input.skipField(tag)) {
-            break;  // end of group
+            break; // End of group
           }
         }
       }
-
       input.checkLastTagWas(WireFormat.MESSAGE_SET_ITEM_END_TAG);
 
-      if (subBuilder != null) {
-        builder.setField(field, subBuilder.build());
+      // Process the raw bytes.
+      if (rawBytes != null && typeId != 0) { // Zero is not a valid type ID.
+        if (extension != null) { // We known the type
+          mergeMessageSetExtensionFromBytes(
+              rawBytes, extension, extensionRegistry, builder, extensions);
+        } else { // We don't know how to parse this. Ignore it.
+          if (rawBytes != null) {
+            unknownFields.mergeField(typeId, UnknownFieldSet.Field.newBuilder()
+                .addLengthDelimited(rawBytes).build());
+          }
+        }
+      }
+    }
+
+    private static void eagerlyMergeMessageSetExtension(
+        CodedInputStream input,
+        ExtensionRegistry.ExtensionInfo extension,
+        ExtensionRegistryLite extensionRegistry,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
+
+      FieldDescriptor field = extension.descriptor;
+      Message value = null;
+      if (hasOriginalMessage(builder, extensions, field)) {
+        Message originalMessage =
+            getOriginalMessage(builder, extensions, field);
+        Message.Builder subBuilder = originalMessage.toBuilder();
+        input.readMessage(subBuilder, extensionRegistry);
+        value = subBuilder.buildPartial();
+      } else {
+        value = input.readMessage(extension.defaultInstance.getParserForType(),
+          extensionRegistry);
+      }
+
+      if (builder != null) {
+        builder.setField(field, value);
+      } else {
+        extensions.setField(field, value);
+      }
+    }
+
+    private static void mergeMessageSetExtensionFromBytes(
+        ByteString rawBytes,
+        ExtensionRegistry.ExtensionInfo extension,
+        ExtensionRegistryLite extensionRegistry,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
+
+      FieldDescriptor field = extension.descriptor;
+      boolean hasOriginalValue = hasOriginalMessage(builder, extensions, field);
+
+      if (hasOriginalValue || ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+        // If the field already exists, we just parse the field.
+        Message value = null;
+        if (hasOriginalValue) {
+          Message originalMessage =
+              getOriginalMessage(builder, extensions, field);
+          Message.Builder subBuilder= originalMessage.toBuilder();
+          subBuilder.mergeFrom(rawBytes, extensionRegistry);
+          value = subBuilder.buildPartial();
+        } else {
+          value = extension.defaultInstance.getParserForType()
+              .parsePartialFrom(rawBytes, extensionRegistry);
+        }
+        setField(builder, extensions, field, value);
+      } else {
+        // Use LazyField to load MessageSet lazily.
+        LazyField lazyField = new LazyField(
+            extension.defaultInstance, extensionRegistry, rawBytes);
+        if (builder != null) {
+          // TODO(xiangl): it looks like this method can only be invoked by
+          // ExtendableBuilder, but I'm not sure. So I double check the type of
+          // builder here. It may be useless and need more investigation.
+          if (builder instanceof ExtendableBuilder) {
+            builder.setField(field, lazyField);
+          } else {
+            builder.setField(field, lazyField.getValue());
+          }
+        } else {
+          extensions.setField(field, lazyField);
+        }
       }
     }
 
@@ -596,6 +756,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+      throw new UnsupportedOperationException(
+          "getFieldBuilder() called on an unsupported message type.");
+    }
+
     /**
      * Construct an UninitializedMessageException reporting missing fields in
      * the given message.
@@ -609,14 +774,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
      * Populates {@code this.missingFields} with the full "path" of each
      * missing required field in the given message.
      */
-    private static List<String> findMissingFields(final Message message) {
+    private static List<String> findMissingFields(
+        final MessageOrBuilder message) {
       final List<String> results = new ArrayList<String>();
       findMissingFields(message, "", results);
       return results;
     }
 
     /** Recursive helper implementing {@link #findMissingFields(Message)}. */
-    private static void findMissingFields(final Message message,
+    private static void findMissingFields(final MessageOrBuilder message,
                                           final String prefix,
                                           final List<String> results) {
       for (final FieldDescriptor field :
@@ -635,13 +801,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
           if (field.isRepeated()) {
             int i = 0;
             for (final Object element : (List) value) {
-              findMissingFields((Message) element,
+              findMissingFields((MessageOrBuilder) element,
                                 subMessagePrefix(prefix, field, i++),
                                 results);
             }
           } else {
             if (message.hasField(field)) {
-              findMissingFields((Message) value,
+              findMissingFields((MessageOrBuilder) value,
                                 subMessagePrefix(prefix, field, -1),
                                 results);
             }
diff --git a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
index 77b27370..9926f3db 100644
--- a/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
+++ b/java/src/main/java/com/google/protobuf/AbstractMessageLite.java
@@ -92,6 +92,14 @@ public abstract class AbstractMessageLite implements MessageLite {
   }
 
   /**
+   * Package private helper method for AbstractParser to create
+   * UninitializedMessageException.
+   */
+  UninitializedMessageException newUninitializedMessageException() {
+    return new UninitializedMessageException(this);
+  }
+
+  /**
    * A partial implementation of the {@link Message.Builder} interface which
    * implements as many methods of that interface as possible in terms of
    * other methods.
@@ -307,10 +315,12 @@ public abstract class AbstractMessageLite implements MessageLite {
      */
     protected static <T> void addAll(final Iterable<T> values,
                                      final Collection<? super T> list) {
-      for (final T value : values) {
-        if (value == null) {
-          throw new NullPointerException();
-        }
+      if (values instanceof LazyStringList) {
+        // For StringOrByteStringLists, check the underlying elements to avoid
+        // forcing conversions of ByteStrings to Strings.
+        checkForNullValues(((LazyStringList) values).getUnderlyingElements());
+      } else {
+        checkForNullValues(values);
       }
       if (values instanceof Collection) {
         final Collection<T> collection = (Collection<T>) values;
@@ -321,5 +331,13 @@ public abstract class AbstractMessageLite implements MessageLite {
         }
       }
     }
+
+    private static void checkForNullValues(final Iterable<?> values) {
+      for (final Object value : values) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+      }
+    }
   }
 }
diff --git a/java/src/main/java/com/google/protobuf/AbstractParser.java b/java/src/main/java/com/google/protobuf/AbstractParser.java
new file mode 100644
index 00000000..9bd9d397
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/AbstractParser.java
@@ -0,0 +1,261 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A partial implementation of the {@link Parser} interface which implements
+ * as many methods of that interface as possible in terms of other methods.
+ *
+ * Note: This class implements all the convenience methods in the
+ * {@link Parser} interface. See {@link Parser} for related javadocs.
+ * Subclasses need to implement
+ * {@link Parser#parsePartialFrom(CodedInputStream, ExtensionRegistryLite)}
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public abstract class AbstractParser<MessageType extends MessageLite>
+    implements Parser<MessageType> {
+  /**
+   * Creates an UninitializedMessageException for MessageType.
+   */
+  private UninitializedMessageException
+      newUninitializedMessageException(MessageType message) {
+    if (message instanceof AbstractMessageLite) {
+      return ((AbstractMessageLite) message).newUninitializedMessageException();
+    }
+    return new UninitializedMessageException(message);
+  }
+
+  /**
+   * Helper method to check if message is initialized.
+   *
+   * @throws InvalidProtocolBufferException if it is not initialized.
+   * @return The message to check.
+   */
+  private MessageType checkMessageInitialized(MessageType message)
+      throws InvalidProtocolBufferException {
+    if (message != null && !message.isInitialized()) {
+      throw newUninitializedMessageException(message)
+          .asInvalidProtocolBufferException()
+          .setUnfinishedMessage(message);
+    }
+    return message;
+  }
+
+  private static final ExtensionRegistryLite EMPTY_REGISTRY
+      = ExtensionRegistryLite.getEmptyRegistry();
+
+  public MessageType parsePartialFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(CodedInputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parseFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(ByteString data,
+                                      ExtensionRegistryLite extensionRegistry)
+    throws InvalidProtocolBufferException {
+    MessageType message;
+    try {
+      CodedInputStream input = data.newCodedInput();
+      message = parsePartialFrom(input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Reading from a ByteString threw an IOException (should " +
+          "never happen).", e);
+    }
+  }
+
+  public MessageType parsePartialFrom(ByteString data)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(ByteString data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(parsePartialFrom(data, extensionRegistry));
+  }
+
+  public MessageType parseFrom(ByteString data)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(byte[] data, int off, int len,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    try {
+      CodedInputStream input = CodedInputStream.newInstance(data, off, len);
+      MessageType message = parsePartialFrom(input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Reading from a byte array threw an IOException (should " +
+          "never happen).", e);
+    }
+  }
+
+  public MessageType parsePartialFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, off, len, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(byte[] data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, 0, data.length, extensionRegistry);
+  }
+
+  public MessageType parsePartialFrom(byte[] data)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, 0, data.length, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(byte[] data, int off, int len,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(data, off, len, extensionRegistry));
+  }
+
+  public MessageType parseFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, off, len, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(byte[] data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, 0, data.length, extensionRegistry);
+  }
+
+  public MessageType parseFrom(byte[] data)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(InputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    CodedInputStream codedInput = CodedInputStream.newInstance(input);
+    MessageType message = parsePartialFrom(codedInput, extensionRegistry);
+    try {
+      codedInput.checkLastTagWas(0);
+    } catch (InvalidProtocolBufferException e) {
+      throw e.setUnfinishedMessage(message);
+    }
+    return message;
+  }
+
+  public MessageType parsePartialFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(InputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parseFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    int size;
+    try {
+      int firstByte = input.read();
+      if (firstByte == -1) {
+        return null;
+      }
+      size = CodedInputStream.readRawVarint32(firstByte, input);
+    } catch (IOException e) {
+      throw new InvalidProtocolBufferException(e.getMessage());
+    }
+    InputStream limitedInput = new LimitedInputStream(input, size);
+    return parsePartialFrom(limitedInput, extensionRegistry);
+  }
+
+  public MessageType parsePartialDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialDelimitedFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialDelimitedFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parseDelimitedFrom(input, EMPTY_REGISTRY);
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/BoundedByteString.java b/java/src/main/java/com/google/protobuf/BoundedByteString.java
new file mode 100644
index 00000000..cd4982c3
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/BoundedByteString.java
@@ -0,0 +1,163 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.NoSuchElementException;
+
+/**
+ * This class is used to represent the substring of a {@link ByteString} over a
+ * single byte array. In terms of the public API of {@link ByteString}, you end
+ * up here by calling {@link ByteString#copyFrom(byte[])} followed by {@link
+ * ByteString#substring(int, int)}.
+ *
+ * <p>This class contains most of the overhead involved in creating a substring
+ * from a {@link LiteralByteString}.  The overhead involves some range-checking
+ * and two extra fields.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class BoundedByteString extends LiteralByteString {
+
+  private final int bytesOffset;
+  private final int bytesLength;
+
+  /**
+   * Creates a {@code BoundedByteString} backed by the sub-range of given array,
+   * without copying.
+   *
+   * @param bytes  array to wrap
+   * @param offset index to first byte to use in bytes
+   * @param length number of bytes to use from bytes
+   * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0},
+   *                                  or if {@code offset + length >
+   *                                  bytes.length}.
+   */
+  BoundedByteString(byte[] bytes, int offset, int length) {
+    super(bytes);
+    if (offset < 0) {
+      throw new IllegalArgumentException("Offset too small: " + offset);
+    }
+    if (length < 0) {
+      throw new IllegalArgumentException("Length too small: " + offset);
+    }
+    if ((long) offset + length > bytes.length) {
+      throw new IllegalArgumentException(
+          "Offset+Length too large: " + offset + "+" + length);
+    }
+
+    this.bytesOffset = offset;
+    this.bytesLength = length;
+  }
+
+  /**
+   * Gets the byte at the given index.
+   * Throws {@link ArrayIndexOutOfBoundsException}
+   * for backwards-compatibility reasons although it would more properly be
+   * {@link IndexOutOfBoundsException}.
+   *
+   * @param index index of byte
+   * @return the value
+   * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+   */
+  @Override
+  public byte byteAt(int index) {
+    // We must check the index ourselves as we cannot rely on Java array index
+    // checking for substrings.
+    if (index < 0) {
+      throw new ArrayIndexOutOfBoundsException("Index too small: " + index);
+    }
+    if (index >= size()) {
+      throw new ArrayIndexOutOfBoundsException(
+          "Index too large: " + index + ", " + size());
+    }
+
+    return bytes[bytesOffset + index];
+  }
+
+  @Override
+  public int size() {
+    return bytesLength;
+  }
+
+  @Override
+  protected int getOffsetIntoBytes() {
+    return bytesOffset;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset, 
+      int targetOffset, int numberToCopy) {
+    System.arraycopy(bytes, getOffsetIntoBytes() + sourceOffset, target,
+        targetOffset, numberToCopy);
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new BoundedByteIterator();
+  }
+
+  private class BoundedByteIterator implements ByteIterator {
+
+    private int position;
+    private final int limit;
+
+    private BoundedByteIterator() {
+      position = getOffsetIntoBytes();
+      limit = position + size();
+    }
+
+    public boolean hasNext() {
+      return (position < limit);
+    }
+
+    public Byte next() {
+      // Boxing calls Byte.valueOf(byte), which does not instantiate.
+      return nextByte();
+    }
+
+    public byte nextByte() {
+      if (position >= limit) {
+        throw new NoSuchElementException();
+      }
+      return bytes[position++];
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java
index 91356357..1b18169e 100644
--- a/java/src/main/java/com/google/protobuf/ByteString.java
+++ b/java/src/main/java/com/google/protobuf/ByteString.java
@@ -30,140 +30,413 @@
 
 package com.google.protobuf;
 
-import java.io.InputStream;
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
-import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
 import java.util.List;
+import java.util.NoSuchElementException;
 
 /**
- * Immutable array of bytes.
+ * Immutable sequence of bytes.  Substring is supported by sharing the reference
+ * to the immutable underlying bytes, as with {@link String}.  Concatenation is
+ * likewise supported without copying (long strings) by building a tree of
+ * pieces in {@link RopeByteString}.
+ * <p>
+ * Like {@link String}, the contents of a {@link ByteString} can never be
+ * observed to change, not even in the presence of a data race or incorrect
+ * API usage in the client code.
  *
  * @author crazybob@google.com Bob Lee
  * @author kenton@google.com Kenton Varda
+ * @author carlanton@google.com Carl Haverl
+ * @author martinrb@google.com Martin Buchholz
  */
-public final class ByteString {
-  private final byte[] bytes;
+public abstract class ByteString implements Iterable<Byte> {
 
-  private ByteString(final byte[] bytes) {
-    this.bytes = bytes;
-  }
+  /**
+   * When two strings to be concatenated have a combined length shorter than
+   * this, we just copy their bytes on {@link #concat(ByteString)}.
+   * The trade-off is copy size versus the overhead of creating tree nodes
+   * in {@link RopeByteString}.
+   */
+  static final int CONCATENATE_BY_COPY_SIZE = 128;
+
+  /**
+   * When copying an InputStream into a ByteString with .readFrom(),
+   * the chunks in the underlying rope start at 256 bytes, but double
+   * each iteration up to 8192 bytes.
+   */
+  static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
+  static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
 
   /**
-   * Gets the byte at the given index.
+   * Empty {@code ByteString}.
+   */
+  public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
+
+  // This constructor is here to prevent subclassing outside of this package,
+  ByteString() {}
+
+  /**
+   * Gets the byte at the given index. This method should be used only for
+   * random access to individual bytes. To access bytes sequentially, use the
+   * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
+   * #substring(int, int)} first if necessary.
    *
+   * @param index index of byte
+   * @return the value
    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
    */
-  public byte byteAt(final int index) {
-    return bytes[index];
+  public abstract byte byteAt(int index);
+
+  /**
+   * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
+   * To avoid auto-boxing, you may get the iterator manually and call
+   * {@link ByteIterator#nextByte()}.
+   *
+   * @return the iterator
+   */
+  public abstract ByteIterator iterator();
+
+  /**
+   * This interface extends {@code Iterator<Byte>}, so that we can return an
+   * unboxed {@code byte}.
+   */
+  public interface ByteIterator extends Iterator<Byte> {
+    /**
+     * An alternative to {@link Iterator#next()} that returns an
+     * unboxed primitive {@code byte}.
+     *
+     * @return the next {@code byte} in the iteration
+     * @throws NoSuchElementException if the iteration has no more elements
+     */
+    byte nextByte();
   }
 
   /**
    * Gets the number of bytes.
+   *
+   * @return size in bytes
    */
-  public int size() {
-    return bytes.length;
-  }
+  public abstract int size();
 
   /**
    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
+   *
+   * @return true if this is zero bytes long
    */
   public boolean isEmpty() {
-    return bytes.length == 0;
+    return size() == 0;
   }
 
   // =================================================================
-  // byte[] -> ByteString
+  // ByteString -> substring
 
   /**
-   * Empty ByteString.
+   * Return the substring from {@code beginIndex}, inclusive, to the end of the
+   * string.
+   *
+   * @param beginIndex start at this index
+   * @return substring sharing underlying data
+   * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
+   *     {@code beginIndex > size()}.
    */
-  public static final ByteString EMPTY = new ByteString(new byte[0]);
+  public ByteString substring(int beginIndex) {
+    return substring(beginIndex, size());
+  }
+
+  /**
+   * Return the substring from {@code beginIndex}, inclusive, to {@code
+   * endIndex}, exclusive.
+   *
+   * @param beginIndex start at this index
+   * @param endIndex   the last character is the one before this index
+   * @return substring sharing underlying data
+   * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
+   *     {@code endIndex > size()}, or {@code beginIndex > endIndex}.
+   */
+  public abstract ByteString substring(int beginIndex, int endIndex);
+
+  /**
+   * Tests if this bytestring starts with the specified prefix.
+   * Similar to {@link String#startsWith(String)}
+   *
+   * @param prefix the prefix.
+   * @return <code>true</code> if the byte sequence represented by the
+   *         argument is a prefix of the byte sequence represented by
+   *         this string; <code>false</code> otherwise.
+   */
+  public boolean startsWith(ByteString prefix) {
+    return size() >= prefix.size() &&
+           substring(0, prefix.size()).equals(prefix);
+  }
+
+  // =================================================================
+  // byte[] -> ByteString
 
   /**
    * Copies the given bytes into a {@code ByteString}.
+   *
+   * @param bytes source array
+   * @param offset offset in source array
+   * @param size number of bytes to copy
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFrom(final byte[] bytes, final int offset,
-                                    final int size) {
-    final byte[] copy = new byte[size];
+  public static ByteString copyFrom(byte[] bytes, int offset, int size) {
+    byte[] copy = new byte[size];
     System.arraycopy(bytes, offset, copy, 0, size);
-    return new ByteString(copy);
+    return new LiteralByteString(copy);
   }
 
   /**
    * Copies the given bytes into a {@code ByteString}.
+   *
+   * @param bytes to copy
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFrom(final byte[] bytes) {
+  public static ByteString copyFrom(byte[] bytes) {
     return copyFrom(bytes, 0, bytes.length);
   }
 
   /**
-   * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into
+   * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
    * a {@code ByteString}.
+   *
+   * @param bytes source buffer
+   * @param size number of bytes to copy
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFrom(final ByteBuffer bytes, final int size) {
-    final byte[] copy = new byte[size];
+  public static ByteString copyFrom(ByteBuffer bytes, int size) {
+    byte[] copy = new byte[size];
     bytes.get(copy);
-    return new ByteString(copy);
+    return new LiteralByteString(copy);
   }
 
   /**
    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
    * a {@code ByteString}.
+   *
+   * @param bytes sourceBuffer
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFrom(final ByteBuffer bytes) {
+  public static ByteString copyFrom(ByteBuffer bytes) {
     return copyFrom(bytes, bytes.remaining());
   }
 
   /**
    * Encodes {@code text} into a sequence of bytes using the named charset
    * and returns the result as a {@code ByteString}.
+   *
+   * @param text source string
+   * @param charsetName encoding to use
+   * @return new {@code ByteString}
+   * @throws UnsupportedEncodingException if the encoding isn't found
    */
-  public static ByteString copyFrom(final String text, final String charsetName)
+  public static ByteString copyFrom(String text, String charsetName)
       throws UnsupportedEncodingException {
-    return new ByteString(text.getBytes(charsetName));
+    return new LiteralByteString(text.getBytes(charsetName));
   }
 
   /**
    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
    * result as a {@code ByteString}.
+   *
+   * @param text source string
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFromUtf8(final String text) {
+  public static ByteString copyFromUtf8(String text) {
     try {
-      return new ByteString(text.getBytes("UTF-8"));
+      return new LiteralByteString(text.getBytes("UTF-8"));
     } catch (UnsupportedEncodingException e) {
       throw new RuntimeException("UTF-8 not supported?", e);
     }
   }
 
+  // =================================================================
+  // InputStream -> ByteString
+
   /**
-   * Concatenates all byte strings in the list and returns the result.
+   * Completely reads the given stream's bytes into a
+   * {@code ByteString}, blocking if necessary until all bytes are
+   * read through to the end of the stream.
+   *
+   * <b>Performance notes:</b> The returned {@code ByteString} is an
+   * immutable tree of byte arrays ("chunks") of the stream data.  The
+   * first chunk is small, with subsequent chunks each being double
+   * the size, up to 8K.  If the caller knows the precise length of
+   * the stream and wishes to avoid all unnecessary copies and
+   * allocations, consider using the two-argument version of this
+   * method, below.
+   *
+   * @param streamToDrain The source stream, which is read completely
+   *     but not closed.
+   * @return A new {@code ByteString} which is made up of chunks of
+   *     various sizes, depending on the behavior of the underlying
+   *     stream.
+   * @throws IOException IOException is thrown if there is a problem
+   *     reading the underlying stream.
+   */
+  public static ByteString readFrom(InputStream streamToDrain)
+      throws IOException {
+    return readFrom(
+        streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
+  }
+
+  /**
+   * Completely reads the given stream's bytes into a
+   * {@code ByteString}, blocking if necessary until all bytes are
+   * read through to the end of the stream.
+   *
+   * <b>Performance notes:</b> The returned {@code ByteString} is an
+   * immutable tree of byte arrays ("chunks") of the stream data.  The
+   * chunkSize parameter sets the size of these byte arrays. In
+   * particular, if the chunkSize is precisely the same as the length
+   * of the stream, unnecessary allocations and copies will be
+   * avoided. Otherwise, the chunks will be of the given size, except
+   * for the last chunk, which will be resized (via a reallocation and
+   * copy) to contain the remainder of the stream.
+   *
+   * @param streamToDrain The source stream, which is read completely
+   *     but not closed.
+   * @param chunkSize The size of the chunks in which to read the
+   *     stream.
+   * @return A new {@code ByteString} which is made up of chunks of
+   *     the given size.
+   * @throws IOException IOException is thrown if there is a problem
+   *     reading the underlying stream.
+   */
+  public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
+      throws IOException {
+    return readFrom(streamToDrain, chunkSize, chunkSize);
+  }
+
+  // Helper method that takes the chunk size range as a parameter.
+  public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
+      int maxChunkSize) throws IOException {
+    Collection<ByteString> results = new ArrayList<ByteString>();
+
+    // copy the inbound bytes into a list of chunks; the chunk size
+    // grows exponentially to support both short and long streams.
+    int chunkSize = minChunkSize;
+    while (true) {
+      ByteString chunk = readChunk(streamToDrain, chunkSize);
+      if (chunk == null) {
+        break;
+      }
+      results.add(chunk);
+      chunkSize = Math.min(chunkSize * 2, maxChunkSize);
+    }
+
+    return ByteString.copyFrom(results);
+  }
+
+  /**
+   * Blocks until a chunk of the given size can be made from the
+   * stream, or EOF is reached.  Calls read() repeatedly in case the
+   * given stream implementation doesn't completely fill the given
+   * buffer in one read() call.
+   *
+   * @return A chunk of the desired size, or else a chunk as large as
+   * was available when end of stream was reached. Returns null if the
+   * given stream had no more data in it.
+   */
+  private static ByteString readChunk(InputStream in, final int chunkSize)
+      throws IOException {
+      final byte[] buf = new byte[chunkSize];
+      int bytesRead = 0;
+      while (bytesRead < chunkSize) {
+        final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
+        if (count == -1) {
+          break;
+        }
+        bytesRead += count;
+      }
+
+      if (bytesRead == 0) {
+        return null;
+      } else {
+        return ByteString.copyFrom(buf, 0, bytesRead);
+      }
+  }
+
+  // =================================================================
+  // Multiple ByteStrings -> One ByteString
+
+  /**
+   * Concatenate the given {@code ByteString} to this one. Short concatenations,
+   * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
+   * produced by copying the underlying bytes (as per Rope.java, <a
+   * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+   * BAP95 </a>. In general, the concatenate involves no copying.
+   *
+   * @param other string to concatenate
+   * @return a new {@code ByteString} instance
+   */
+  public ByteString concat(ByteString other) {
+    int thisSize = size();
+    int otherSize = other.size();
+    if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("ByteString would be too long: " +
+                                         thisSize + "+" + otherSize);
+    }
+
+    return RopeByteString.concatenate(this, other);
+  }
+
+  /**
+   * Concatenates all byte strings in the iterable and returns the result.
+   * This is designed to run in O(list size), not O(total bytes).
    *
    * <p>The returned {@code ByteString} is not necessarily a unique object.
    * If the list is empty, the returned object is the singleton empty
    * {@code ByteString}.  If the list has only one element, that
    * {@code ByteString} will be returned without copying.
+   *
+   * @param byteStrings strings to be concatenated
+   * @return new {@code ByteString}
    */
-  public static ByteString copyFrom(List<ByteString> list) {
-    if (list.size() == 0) {
-      return EMPTY;
-    } else if (list.size() == 1) {
-      return list.get(0);
+  public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
+    Collection<ByteString> collection;
+    if (!(byteStrings instanceof Collection)) {
+      collection = new ArrayList<ByteString>();
+      for (ByteString byteString : byteStrings) {
+        collection.add(byteString);
+      }
+    } else {
+      collection = (Collection<ByteString>) byteStrings;
     }
-
-    int size = 0;
-    for (ByteString str : list) {
-      size += str.size();
+    ByteString result;
+    if (collection.isEmpty()) {
+      result = EMPTY;
+    } else {
+      result = balancedConcat(collection.iterator(), collection.size());
     }
-    byte[] bytes = new byte[size];
-    int pos = 0;
-    for (ByteString str : list) {
-      System.arraycopy(str.bytes, 0, bytes, pos, str.size());
-      pos += str.size();
+    return result;
+  }
+
+  // Internal function used by copyFrom(Iterable<ByteString>).
+  // Create a balanced concatenation of the next "length" elements from the
+  // iterable.
+  private static ByteString balancedConcat(Iterator<ByteString> iterator,
+      int length) {
+    assert length >= 1;
+    ByteString result;
+    if (length == 1) {
+      result = iterator.next();
+    } else {
+      int halfLength = length >>> 1;
+      ByteString left = balancedConcat(iterator, halfLength);
+      ByteString right = balancedConcat(iterator, length - halfLength);
+      result = left.concat(right);
     }
-    return new ByteString(bytes);
+    return result;
   }
 
   // =================================================================
@@ -174,206 +447,446 @@ public final class ByteString {
    *
    * @param target buffer to copy into
    * @param offset in the target buffer
+   * @throws IndexOutOfBoundsException if the offset is negative or too large
    */
-  public void copyTo(final byte[] target, final int offset) {
-    System.arraycopy(bytes, 0, target, offset, bytes.length);
+  public void copyTo(byte[] target, int offset) {
+    copyTo(target, 0, offset, size());
   }
 
   /**
    * Copies bytes into a buffer.
    *
-   * @param target buffer to copy into
+   * @param target       buffer to copy into
    * @param sourceOffset offset within these bytes
    * @param targetOffset offset within the target buffer
-   * @param size number of bytes to copy
+   * @param numberToCopy number of bytes to copy
+   * @throws IndexOutOfBoundsException if an offset or size is negative or too
+   *     large
    */
-  public void copyTo(final byte[] target, final int sourceOffset,
-                     final int targetOffset,
-      final int size) {
-    System.arraycopy(bytes, sourceOffset, target, targetOffset, size);
+  public void copyTo(byte[] target, int sourceOffset, int targetOffset,
+      int numberToCopy) {
+    if (sourceOffset < 0) {
+      throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
+    }
+    if (targetOffset < 0) {
+      throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
+    }
+    if (numberToCopy < 0) {
+      throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
+    }
+    if (sourceOffset + numberToCopy > size()) {
+      throw new IndexOutOfBoundsException(
+          "Source end offset < 0: " + (sourceOffset + numberToCopy));
+    }
+    if (targetOffset + numberToCopy > target.length) {
+      throw new IndexOutOfBoundsException(
+          "Target end offset < 0: " + (targetOffset + numberToCopy));
+    }
+    if (numberToCopy > 0) {
+      copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+    }
   }
 
   /**
+   * Internal (package private) implementation of
+   * @link{#copyTo(byte[],int,int,int}.
+   * It assumes that all error checking has already been performed and that 
+   * @code{numberToCopy > 0}.
+   */
+  protected abstract void copyToInternal(byte[] target, int sourceOffset,
+      int targetOffset, int numberToCopy);
+
+  /**
    * Copies bytes into a ByteBuffer.
    *
    * @param target ByteBuffer to copy into.
-   * @throws ReadOnlyBufferException if the {@code target} is read-only
-   * @throws BufferOverflowException if the {@code target}'s remaining()
-   *         space is not large enough to hold the data.
+   * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
+   * @throws java.nio.BufferOverflowException if the {@code target}'s
+   *     remaining() space is not large enough to hold the data.
    */
-  public void copyTo(ByteBuffer target) {
-    target.put(bytes, 0, bytes.length);
-  }
+  public abstract void copyTo(ByteBuffer target);
 
   /**
    * Copies bytes to a {@code byte[]}.
+   *
+   * @return copied bytes
    */
   public byte[] toByteArray() {
-    final int size = bytes.length;
-    final byte[] copy = new byte[size];
-    System.arraycopy(bytes, 0, copy, 0, size);
-    return copy;
+    int size = size();
+    byte[] result = new byte[size];
+    copyToInternal(result, 0, 0, size);
+    return result;
   }
 
   /**
-   * Constructs a new read-only {@code java.nio.ByteBuffer} with the
-   * same backing byte array.
+   * Writes the complete contents of this byte string to
+   * the specified output stream argument.
+   *
+   * @param  out  the output stream to which to write the data.
+   * @throws IOException  if an I/O error occurs.
    */
-  public ByteBuffer asReadOnlyByteBuffer() {
-    final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
-    return byteBuffer.asReadOnlyBuffer();
-  }
+  public abstract void writeTo(OutputStream out) throws IOException;
+
+  /**
+   * Constructs a read-only {@code java.nio.ByteBuffer} whose content
+   * is equal to the contents of this byte string.
+   * The result uses the same backing array as the byte string, if possible.
+   *
+   * @return wrapped bytes
+   */
+  public abstract ByteBuffer asReadOnlyByteBuffer();
+
+  /**
+   * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
+   * such that the concatenation of their contents is equal to the contents
+   * of this byte string.  The result uses the same backing arrays as the
+   * byte string.
+   * <p>
+   * By returning a list, implementations of this method may be able to avoid
+   * copying even when there are multiple backing arrays.
+   * 
+   * @return a list of wrapped bytes
+   */
+  public abstract List<ByteBuffer> asReadOnlyByteBufferList();
 
   /**
    * Constructs a new {@code String} by decoding the bytes using the
    * specified charset.
+   *
+   * @param charsetName encode using this charset
+   * @return new string
+   * @throws UnsupportedEncodingException if charset isn't recognized
    */
-  public String toString(final String charsetName)
-      throws UnsupportedEncodingException {
-    return new String(bytes, charsetName);
-  }
+  public abstract String toString(String charsetName)
+      throws UnsupportedEncodingException;
+
+  // =================================================================
+  // UTF-8 decoding
 
   /**
    * Constructs a new {@code String} by decoding the bytes as UTF-8.
+   *
+   * @return new string using UTF-8 encoding
    */
   public String toStringUtf8() {
     try {
-      return new String(bytes, "UTF-8");
+      return toString("UTF-8");
     } catch (UnsupportedEncodingException e) {
       throw new RuntimeException("UTF-8 not supported?", e);
     }
   }
 
+  /**
+   * Tells whether this {@code ByteString} represents a well-formed UTF-8
+   * byte sequence, such that the original bytes can be converted to a
+   * String object and then round tripped back to bytes without loss.
+   *
+   * <p>More precisely, returns {@code true} whenever: <pre> {@code
+   * Arrays.equals(byteString.toByteArray(),
+   *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+   * }</pre>
+   *
+   * <p>This method returns {@code false} for "overlong" byte sequences,
+   * as well as for 3-byte sequences that would map to a surrogate
+   * character, in accordance with the restricted definition of UTF-8
+   * introduced in Unicode 3.1.  Note that the UTF-8 decoder included in
+   * Oracle's JDK has been modified to also reject "overlong" byte
+   * sequences, but (as of 2011) still accepts 3-byte surrogate
+   * character byte sequences.
+   *
+   * <p>See the Unicode Standard,</br>
+   * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+   * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+   *
+   * @return whether the bytes in this {@code ByteString} are a
+   * well-formed UTF-8 byte sequence
+   */
+  public abstract boolean isValidUtf8();
+
+  /**
+   * Tells whether the given byte sequence is a well-formed, malformed, or
+   * incomplete UTF-8 byte sequence.  This method accepts and returns a partial
+   * state result, allowing the bytes for a complete UTF-8 byte sequence to be
+   * composed from multiple {@code ByteString} segments.
+   *
+   * @param state either {@code 0} (if this is the initial decoding operation)
+   *     or the value returned from a call to a partial decoding method for the
+   *     previous bytes
+   * @param offset offset of the first byte to check
+   * @param length number of bytes to check
+   *
+   * @return {@code -1} if the partial byte sequence is definitely malformed,
+   * {@code 0} if it is well-formed (no additional input needed), or, if the
+   * byte sequence is "incomplete", i.e. apparently terminated in the middle of
+   * a character, an opaque integer "state" value containing enough information
+   * to decode the character when passed to a subsequent invocation of a
+   * partial decoding method.
+   */
+  protected abstract int partialIsValidUtf8(int state, int offset, int length);
+
   // =================================================================
   // equals() and hashCode()
 
   @Override
-  public boolean equals(final Object o) {
-    if (o == this) {
-      return true;
-    }
-
-    if (!(o instanceof ByteString)) {
-      return false;
-    }
-
-    final ByteString other = (ByteString) o;
-    final int size = bytes.length;
-    if (size != other.bytes.length) {
-      return false;
-    }
-
-    final byte[] thisBytes = bytes;
-    final byte[] otherBytes = other.bytes;
-    for (int i = 0; i < size; i++) {
-      if (thisBytes[i] != otherBytes[i]) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  private volatile int hash = 0;
+  public abstract boolean equals(Object o);
 
+  /**
+   * Return a non-zero hashCode depending only on the sequence of bytes
+   * in this ByteString.
+   *
+   * @return hashCode value for this object
+   */
   @Override
-  public int hashCode() {
-    int h = hash;
-
-    if (h == 0) {
-      final byte[] thisBytes = bytes;
-      final int size = bytes.length;
-
-      h = size;
-      for (int i = 0; i < size; i++) {
-        h = h * 31 + thisBytes[i];
-      }
-      if (h == 0) {
-        h = 1;
-      }
-
-      hash = h;
-    }
-
-    return h;
-  }
+  public abstract int hashCode();
 
   // =================================================================
   // Input stream
 
   /**
    * Creates an {@code InputStream} which can be used to read the bytes.
+   * <p>
+   * The {@link InputStream} returned by this method is guaranteed to be
+   * completely non-blocking.  The method {@link InputStream#available()}
+   * returns the number of bytes remaining in the stream. The methods
+   * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
+   * and {@link InputStream#skip(long)} will read/skip as many bytes as are
+   * available.
+   * <p>
+   * The methods in the returned {@link InputStream} might <b>not</b> be
+   * thread safe.
+   *
+   * @return an input stream that returns the bytes of this byte string.
    */
-  public InputStream newInput() {
-    return new ByteArrayInputStream(bytes);
-  }
+  public abstract InputStream newInput();
 
   /**
    * Creates a {@link CodedInputStream} which can be used to read the bytes.
-   * Using this is more efficient than creating a {@link CodedInputStream}
-   * wrapping the result of {@link #newInput()}.
+   * Using this is often more efficient than creating a {@link CodedInputStream}
+   * that wraps the result of {@link #newInput()}.
+   *
+   * @return stream based on wrapped data
    */
-  public CodedInputStream newCodedInput() {
-    // We trust CodedInputStream not to modify the bytes, or to give anyone
-    // else access to them.
-    return CodedInputStream.newInstance(bytes);
-  }
+  public abstract CodedInputStream newCodedInput();
 
   // =================================================================
   // Output stream
 
   /**
-   * Creates a new {@link Output} with the given initial capacity.
+   * Creates a new {@link Output} with the given initial capacity. Call {@link
+   * Output#toByteString()} to create the {@code ByteString} instance.
+   * <p>
+   * A {@link ByteString.Output} offers the same functionality as a
+   * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+   * rather than a {@code byte} array.
+   *
+   * @param initialCapacity estimate of number of bytes to be written
+   * @return {@code OutputStream} for building a {@code ByteString}
    */
-  public static Output newOutput(final int initialCapacity) {
-    return new Output(new ByteArrayOutputStream(initialCapacity));
+  public static Output newOutput(int initialCapacity) {
+    return new Output(initialCapacity);
   }
 
   /**
-   * Creates a new {@link Output}.
+   * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
+   * the {@code ByteString} instance.
+   * <p>
+   * A {@link ByteString.Output} offers the same functionality as a
+   * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
+   * rather than a {@code byte array}.
+   *
+   * @return {@code OutputStream} for building a {@code ByteString}
    */
   public static Output newOutput() {
-    return newOutput(32);
+    return new Output(CONCATENATE_BY_COPY_SIZE);
   }
 
   /**
    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
    * create the {@code ByteString} instance.
    */
-  public static final class Output extends FilterOutputStream {
-    private final ByteArrayOutputStream bout;
+  public static final class Output extends OutputStream {
+    // Implementation note.
+    // The public methods of this class must be synchronized.  ByteStrings
+    // are guaranteed to be immutable.  Without some sort of locking, it could
+    // be possible for one thread to call toByteSring(), while another thread
+    // is still modifying the underlying byte array.
+
+    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+    // argument passed by user, indicating initial capacity.
+    private final int initialCapacity;
+    // ByteStrings to be concatenated to create the result
+    private final ArrayList<ByteString> flushedBuffers;
+    // Total number of bytes in the ByteStrings of flushedBuffers
+    private int flushedBuffersTotalBytes;
+    // Current buffer to which we are writing
+    private byte[] buffer;
+    // Location in buffer[] to which we write the next byte.
+    private int bufferPos;
 
     /**
-     * Constructs a new output with the given initial capacity.
+     * Creates a new ByteString output stream with the specified
+     * initial capacity.
+     *
+     * @param initialCapacity  the initial capacity of the output stream.
      */
-    private Output(final ByteArrayOutputStream bout) {
-      super(bout);
-      this.bout = bout;
+    Output(int initialCapacity) {
+      if (initialCapacity < 0) {
+        throw new IllegalArgumentException("Buffer size < 0");
+      }
+      this.initialCapacity = initialCapacity;
+      this.flushedBuffers = new ArrayList<ByteString>();
+      this.buffer = new byte[initialCapacity];
+    }
+
+    @Override
+    public synchronized void write(int b) {
+      if (bufferPos == buffer.length) {
+        flushFullBuffer(1);
+      }
+      buffer[bufferPos++] = (byte)b;
+    }
+
+    @Override
+    public synchronized void write(byte[] b, int offset, int length)  {
+      if (length <= buffer.length - bufferPos) {
+        // The bytes can fit into the current buffer.
+        System.arraycopy(b, offset, buffer, bufferPos, length);
+        bufferPos += length;
+      } else {
+        // Use up the current buffer
+        int copySize  = buffer.length - bufferPos;
+        System.arraycopy(b, offset, buffer, bufferPos, copySize);
+        offset += copySize;
+        length -= copySize;
+        // Flush the buffer, and get a new buffer at least big enough to cover
+        // what we still need to output
+        flushFullBuffer(length);
+        System.arraycopy(b, offset, buffer, 0 /* count */, length);
+        bufferPos = length;
+      }
+    }
+
+    /**
+     * Creates a byte string. Its size is the current size of this output
+     * stream and its output has been copied to it.
+     *
+     * @return  the current contents of this output stream, as a byte string.
+     */
+    public synchronized ByteString toByteString() {
+      flushLastBuffer();
+      return ByteString.copyFrom(flushedBuffers);
+    }
+
+    /**
+     * Writes the complete contents of this byte array output stream to
+     * the specified output stream argument.
+     *
+     * @param out the output stream to which to write the data.
+     * @throws IOException  if an I/O error occurs.
+     */
+    public void writeTo(OutputStream out) throws IOException {
+      ByteString[] cachedFlushBuffers;
+      byte[] cachedBuffer;
+      int cachedBufferPos;
+      synchronized (this) {
+        // Copy the information we need into local variables so as to hold
+        // the lock for as short a time as possible.
+        cachedFlushBuffers =
+            flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
+        cachedBuffer = buffer;
+        cachedBufferPos = bufferPos;
+      }
+      for (ByteString byteString : cachedFlushBuffers) {
+        byteString.writeTo(out);
+      }
+
+      out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos));
+    }
+
+    /**
+     * Returns the current size of the output stream.
+     *
+     * @return  the current size of the output stream
+     */
+    public synchronized int size() {
+      return flushedBuffersTotalBytes + bufferPos;
+    }
+
+    /**
+     * Resets this stream, so that all currently accumulated output in the
+     * output stream is discarded. The output stream can be used again,
+     * reusing the already allocated buffer space.
+     */
+    public synchronized void reset() {
+      flushedBuffers.clear();
+      flushedBuffersTotalBytes = 0;
+      bufferPos = 0;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("<ByteString.Output@%s size=%d>",
+          Integer.toHexString(System.identityHashCode(this)), size());
     }
 
     /**
-     * Creates a {@code ByteString} instance from this {@code Output}.
+     * Internal function used by writers.  The current buffer is full, and the
+     * writer needs a new buffer whose size is at least the specified minimum
+     * size.
      */
-    public ByteString toByteString() {
-      final byte[] byteArray = bout.toByteArray();
-      return new ByteString(byteArray);
+    private void flushFullBuffer(int minSize)  {
+      flushedBuffers.add(new LiteralByteString(buffer));
+      flushedBuffersTotalBytes += buffer.length;
+      // We want to increase our total capacity by 50%, but as a minimum,
+      // the new buffer should also at least be >= minSize and
+      // >= initial Capacity.
+      int newSize = Math.max(initialCapacity,
+          Math.max(minSize, flushedBuffersTotalBytes >>> 1));
+      buffer = new byte[newSize];
+      bufferPos = 0;
+    }
+
+    /**
+     * Internal function used by {@link #toByteString()}. The current buffer may
+     * or may not be full, but it needs to be flushed.
+     */
+    private void flushLastBuffer()  {
+      if (bufferPos < buffer.length) {
+        if (bufferPos > 0) {
+          byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos);
+          flushedBuffers.add(new LiteralByteString(bufferCopy));
+        }
+        // We reuse this buffer for further writes.
+      } else {
+        // Buffer is completely full.  Huzzah.
+        flushedBuffers.add(new LiteralByteString(buffer));
+        // 99% of the time, we're not going to use this OutputStream again.
+        // We set buffer to an empty byte stream so that we're handling this
+        // case without wasting space.  In the rare case that more writes
+        // *do* occur, this empty buffer will be flushed and an appropriately
+        // sized new buffer will be created.
+        buffer = EMPTY_BYTE_ARRAY;
+      }
+      flushedBuffersTotalBytes += bufferPos;
+      bufferPos = 0;
     }
   }
 
   /**
-   * Constructs a new ByteString builder, which allows you to efficiently
-   * construct a {@code ByteString} by writing to a {@link CodedOutputStream}.
-   * Using this is much more efficient than calling {@code newOutput()} and
-   * wrapping that in a {@code CodedOutputStream}.
+   * Constructs a new {@code ByteString} builder, which allows you to
+   * efficiently construct a {@code ByteString} by writing to a {@link
+   * CodedOutputStream}. Using this is much more efficient than calling {@code
+   * newOutput()} and wrapping that in a {@code CodedOutputStream}.
    *
    * <p>This is package-private because it's a somewhat confusing interface.
    * Users can call {@link Message#toByteString()} instead of calling this
    * directly.
    *
-   * @param size The target byte size of the {@code ByteString}.  You must
-   *             write exactly this many bytes before building the result.
+   * @param size The target byte size of the {@code ByteString}.  You must write
+   *     exactly this many bytes before building the result.
+   * @return the builder
    */
-  static CodedBuilder newCodedBuilder(final int size) {
+  static CodedBuilder newCodedBuilder(int size) {
     return new CodedBuilder(size);
   }
 
@@ -382,7 +895,7 @@ public final class ByteString {
     private final CodedOutputStream output;
     private final byte[] buffer;
 
-    private CodedBuilder(final int size) {
+    private CodedBuilder(int size) {
       buffer = new byte[size];
       output = CodedOutputStream.newInstance(buffer);
     }
@@ -393,11 +906,57 @@ public final class ByteString {
       // We can be confident that the CodedOutputStream will not modify the
       // underlying bytes anymore because it already wrote all of them.  So,
       // no need to make a copy.
-      return new ByteString(buffer);
+      return new LiteralByteString(buffer);
     }
 
     public CodedOutputStream getCodedOutput() {
       return output;
     }
   }
+
+  // =================================================================
+  // Methods {@link RopeByteString} needs on instances, which aren't part of the
+  // public API.
+
+  /**
+   * Return the depth of the tree representing this {@code ByteString}, if any,
+   * whose root is this node. If this is a leaf node, return 0.
+   *
+   * @return tree depth or zero
+   */
+  protected abstract int getTreeDepth();
+
+  /**
+   * Return {@code true} if this ByteString is literal (a leaf node) or a
+   * flat-enough tree in the sense of {@link RopeByteString}.
+   *
+   * @return true if the tree is flat enough
+   */
+  protected abstract boolean isBalanced();
+
+  /**
+   * Return the cached hash code if available.
+   *
+   * @return value of cached hash code or 0 if not computed yet
+   */
+  protected abstract int peekCachedHashCode();
+
+  /**
+   * Compute the hash across the value bytes starting with the given hash, and
+   * return the result.  This is used to compute the hash across strings
+   * represented as a set of pieces by allowing the hash computation to be
+   * continued from piece to piece.
+   *
+   * @param h starting hash value
+   * @param offset offset into this value to start looking at data values
+   * @param length number of data values to include in the hash computation
+   * @return ending hash value
+   */
+  protected abstract int partialHash(int h, int offset, int length);
+
+  @Override
+  public String toString() {
+    return String.format("<ByteString@%s size=%d>",
+        Integer.toHexString(System.identityHashCode(this)), size());
+  }
 }
diff --git a/java/src/main/java/com/google/protobuf/CodedInputStream.java b/java/src/main/java/com/google/protobuf/CodedInputStream.java
index b3e08555..33417a7f 100644
--- a/java/src/main/java/com/google/protobuf/CodedInputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedInputStream.java
@@ -243,6 +243,23 @@ public final class CodedInputStream {
     --recursionDepth;
   }
 
+  /** Read a {@code group} field value from the stream. */
+  public <T extends MessageLite> T readGroup(
+      final int fieldNumber,
+      final Parser<T> parser,
+      final ExtensionRegistryLite extensionRegistry)
+      throws IOException {
+    if (recursionDepth >= recursionLimit) {
+      throw InvalidProtocolBufferException.recursionLimitExceeded();
+    }
+    ++recursionDepth;
+    T result = parser.parsePartialFrom(this, extensionRegistry);
+    checkLastTagWas(
+      WireFormat.makeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP));
+    --recursionDepth;
+    return result;
+  }
+
   /**
    * Reads a {@code group} field value from the stream and merges it into the
    * given {@link UnknownFieldSet}.
@@ -278,6 +295,24 @@ public final class CodedInputStream {
     popLimit(oldLimit);
   }
 
+  /** Read an embedded message field value from the stream. */
+  public <T extends MessageLite> T readMessage(
+      final Parser<T> parser,
+      final ExtensionRegistryLite extensionRegistry)
+      throws IOException {
+    int length = readRawVarint32();
+    if (recursionDepth >= recursionLimit) {
+      throw InvalidProtocolBufferException.recursionLimitExceeded();
+    }
+    final int oldLimit = pushLimit(length);
+    ++recursionDepth;
+    T result = parser.parsePartialFrom(this, extensionRegistry);
+    checkLastTagWas(0);
+    --recursionDepth;
+    popLimit(oldLimit);
+    return result;
+  }
+
   /** Read a {@code bytes} field value from the stream. */
   public ByteString readBytes() throws IOException {
     final int size = readRawVarint32();
@@ -601,7 +636,7 @@ public final class CodedInputStream {
    * refreshing its buffer.  If you need to prevent reading past a certain
    * point in the underlying {@code InputStream} (e.g. because you expect it to
    * contain more data after the end of the message which you need to handle
-   * differently) then you must place a wrapper around you {@code InputStream}
+   * differently) then you must place a wrapper around your {@code InputStream}
    * which limits the amount of data that can be read from it.
    *
    * @return the old limit.
@@ -676,7 +711,7 @@ public final class CodedInputStream {
 
   /**
    * Called with {@code this.buffer} is empty to read more bytes from the
-   * input.  If {@code mustSucceed} is true, refillBuffer() gurantees that
+   * input.  If {@code mustSucceed} is true, refillBuffer() guarantees that
    * either there will be at least one byte in the buffer when it returns
    * or it will throw an exception.  If {@code mustSucceed} is false,
    * refillBuffer() returns false if no more bytes were available.
@@ -879,7 +914,7 @@ public final class CodedInputStream {
         refillBuffer(true);
       }
 
-      bufferPos = size - pos; 
+      bufferPos = size - pos;
     }
   }
 }
diff --git a/java/src/main/java/com/google/protobuf/CodedOutputStream.java b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
index ac5f2d30..ca24638d 100644
--- a/java/src/main/java/com/google/protobuf/CodedOutputStream.java
+++ b/java/src/main/java/com/google/protobuf/CodedOutputStream.java
@@ -30,10 +30,10 @@
 
 package com.google.protobuf;
 
-import java.io.OutputStream;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
 
 /**
  * Encodes and writes protocol message fields.
@@ -540,6 +540,15 @@ public final class CodedOutputStream {
   }
 
   /**
+   * Compute the number of bytes that would be needed to encode an
+   * embedded message in lazy field, including tag.
+   */
+  public static int computeLazyFieldSize(final int fieldNumber,
+                                         final LazyField value) {
+    return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value);
+  }
+
+  /**
    * Compute the number of bytes that would be needed to encode a
    * {@code uint32} field, including tag.
    */
@@ -614,6 +623,18 @@ public final class CodedOutputStream {
            computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value);
   }
 
+  /**
+   * Compute the number of bytes that would be needed to encode an
+   * lazily parsed MessageSet extension field to the stream.  For
+   * historical reasons, the wire format differs from normal fields.
+   */
+  public static int computeLazyFieldMessageSetExtensionSize(
+      final int fieldNumber, final LazyField value) {
+    return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
+           computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
+           computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+  }
+  
   // -----------------------------------------------------------------
 
   /**
@@ -730,6 +751,15 @@ public final class CodedOutputStream {
   }
 
   /**
+   * Compute the number of bytes that would be needed to encode an embedded
+   * message stored in lazy field.
+   */
+  public static int computeLazyFieldSizeNoTag(final LazyField value) {
+    final int size = value.getSerializedSize();
+    return computeRawVarint32Size(size) + size;
+  }
+
+  /**
    * Compute the number of bytes that would be needed to encode a
    * {@code bytes} field.
    */
diff --git a/java/src/main/java/com/google/protobuf/Descriptors.java b/java/src/main/java/com/google/protobuf/Descriptors.java
index 2ee84594..a4913053 100644
--- a/java/src/main/java/com/google/protobuf/Descriptors.java
+++ b/java/src/main/java/com/google/protobuf/Descriptors.java
@@ -35,8 +35,10 @@ import com.google.protobuf.DescriptorProtos.*;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.io.UnsupportedEncodingException;
 
 /**
@@ -106,6 +108,11 @@ public final class Descriptors {
       return Collections.unmodifiableList(Arrays.asList(dependencies));
     }
 
+    /** Get a list of this file's public dependencies (public imports). */
+    public List<FileDescriptor> getPublicDependencies() {
+      return Collections.unmodifiableList(Arrays.asList(publicDependencies));
+    }
+
     /**
      * Find a message type in the file by name.  Does not find nested types.
      *
@@ -216,7 +223,7 @@ public final class Descriptors {
     public static FileDescriptor buildFrom(final FileDescriptorProto proto,
                                            final FileDescriptor[] dependencies)
                                     throws DescriptorValidationException {
-      // Building decsriptors involves two steps:  translating and linking.
+      // Building descriptors involves two steps:  translating and linking.
       // In the translation step (implemented by FileDescriptor's
       // constructor), we build an object tree mirroring the
       // FileDescriptorProto's tree and put all of the descriptors into the
@@ -317,12 +324,12 @@ public final class Descriptors {
      * {@link FileDescriptor#internalBuildGeneratedFileFrom}, the caller
      * provides a callback implementing this interface.  The callback is called
      * after the FileDescriptor has been constructed, in order to assign all
-     * the global variales defined in the generated code which point at parts
+     * the global variables defined in the generated code which point at parts
      * of the FileDescriptor.  The callback returns an ExtensionRegistry which
      * contains any extensions which might be used in the descriptor -- that
      * is, extensions of the various "Options" messages defined in
      * descriptor.proto.  The callback may also return null to indicate that
-     * no extensions are used in the decsriptor.
+     * no extensions are used in the descriptor.
      */
     public interface InternalDescriptorAssigner {
       ExtensionRegistry assignDescriptors(FileDescriptor root);
@@ -334,6 +341,7 @@ public final class Descriptors {
     private final ServiceDescriptor[] services;
     private final FieldDescriptor[] extensions;
     private final FileDescriptor[] dependencies;
+    private final FileDescriptor[] publicDependencies;
     private final DescriptorPool pool;
 
     private FileDescriptor(final FileDescriptorProto proto,
@@ -343,6 +351,17 @@ public final class Descriptors {
       this.pool = pool;
       this.proto = proto;
       this.dependencies = dependencies.clone();
+      this.publicDependencies =
+          new FileDescriptor[proto.getPublicDependencyCount()];
+      for (int i = 0; i < proto.getPublicDependencyCount(); i++) {
+        int index = proto.getPublicDependency(i);
+        if (index < 0 || index >= this.dependencies.length) {
+          throw new DescriptorValidationException(this,
+              "Invalid public dependency index.");
+        }
+        this.publicDependencies[i] =
+            this.dependencies[proto.getPublicDependency(i)];
+      }
 
       pool.addPackage(getPackage(), this);
 
@@ -390,7 +409,7 @@ public final class Descriptors {
      * in the original.  This method is needed for bootstrapping when a file
      * defines custom options.  The options may be defined in the file itself,
      * so we can't actually parse them until we've constructed the descriptors,
-     * but to construct the decsriptors we have to have parsed the descriptor
+     * but to construct the descriptors we have to have parsed the descriptor
      * protos.  So, we have to parse the descriptor protos a second time after
      * constructing the descriptors.
      */
@@ -641,7 +660,7 @@ public final class Descriptors {
                  FieldSet.FieldDescriptorLite<FieldDescriptor> {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -656,7 +675,7 @@ public final class Descriptors {
 
     /**
      * Get the field's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -943,7 +962,8 @@ public final class Descriptors {
     private void crossLink() throws DescriptorValidationException {
       if (proto.hasExtendee()) {
         final GenericDescriptor extendee =
-          file.pool.lookupSymbol(proto.getExtendee(), this);
+          file.pool.lookupSymbol(proto.getExtendee(), this,
+              DescriptorPool.SearchFilter.TYPES_ONLY);
         if (!(extendee instanceof Descriptor)) {
           throw new DescriptorValidationException(this,
               '\"' + proto.getExtendee() + "\" is not a message type.");
@@ -960,7 +980,8 @@ public final class Descriptors {
 
       if (proto.hasTypeName()) {
         final GenericDescriptor typeDescriptor =
-          file.pool.lookupSymbol(proto.getTypeName(), this);
+          file.pool.lookupSymbol(proto.getTypeName(), this,
+              DescriptorPool.SearchFilter.TYPES_ONLY);
 
         if (!proto.hasType()) {
           // Choose field type based on symbol.
@@ -1149,7 +1170,7 @@ public final class Descriptors {
       implements GenericDescriptor, Internal.EnumLiteMap<EnumValueDescriptor> {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -1161,7 +1182,7 @@ public final class Descriptors {
 
     /**
      * Get the type's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1182,7 +1203,7 @@ public final class Descriptors {
     /**
      * Find an enum value by name.
      * @param name The unqualified name of the value (e.g. "FOO").
-     * @return the value's decsriptor, or {@code null} if not found.
+     * @return the value's descriptor, or {@code null} if not found.
      */
     public EnumValueDescriptor findValueByName(final String name) {
       final GenericDescriptor result =
@@ -1198,7 +1219,7 @@ public final class Descriptors {
      * Find an enum value by number.  If multiple enum values have the same
      * number, this returns the first defined value with that number.
      * @param number The value's number.
-     * @return the value's decsriptor, or {@code null} if not found.
+     * @return the value's descriptor, or {@code null} if not found.
      */
     public EnumValueDescriptor findValueByNumber(final int number) {
       return file.pool.enumValuesByNumber.get(
@@ -1261,7 +1282,7 @@ public final class Descriptors {
       implements GenericDescriptor, Internal.EnumLite {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -1276,7 +1297,7 @@ public final class Descriptors {
 
     /**
      * Get the value's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1337,7 +1358,7 @@ public final class Descriptors {
 
     /**
      * Get the type's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1355,7 +1376,7 @@ public final class Descriptors {
     /**
      * Find a method by name.
      * @param name The unqualified name of the method (e.g. "Foo").
-     * @return the method's decsriptor, or {@code null} if not found.
+     * @return the method's descriptor, or {@code null} if not found.
      */
     public MethodDescriptor findMethodByName(final String name) {
       final GenericDescriptor result =
@@ -1427,7 +1448,7 @@ public final class Descriptors {
 
     /**
      * Get the method's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1475,7 +1496,8 @@ public final class Descriptors {
 
     private void crossLink() throws DescriptorValidationException {
       final GenericDescriptor input =
-        file.pool.lookupSymbol(proto.getInputType(), this);
+        file.pool.lookupSymbol(proto.getInputType(), this,
+            DescriptorPool.SearchFilter.TYPES_ONLY);
       if (!(input instanceof Descriptor)) {
         throw new DescriptorValidationException(this,
             '\"' + proto.getInputType() + "\" is not a message type.");
@@ -1483,7 +1505,8 @@ public final class Descriptors {
       inputType = (Descriptor)input;
 
       final GenericDescriptor output =
-        file.pool.lookupSymbol(proto.getOutputType(), this);
+        file.pool.lookupSymbol(proto.getOutputType(), this,
+            DescriptorPool.SearchFilter.TYPES_ONLY);
       if (!(output instanceof Descriptor)) {
         throw new DescriptorValidationException(this,
             '\"' + proto.getOutputType() + "\" is not a message type.");
@@ -1535,7 +1558,7 @@ public final class Descriptors {
     public String getProblemSymbolName() { return name; }
 
     /**
-     * Gets the the protocol message representation of the invalid descriptor.
+     * Gets the protocol message representation of the invalid descriptor.
      */
     public Message getProblemProto() { return proto; }
 
@@ -1590,14 +1613,22 @@ public final class Descriptors {
    * descriptors defined in a particular file.
    */
   private static final class DescriptorPool {
+    
+    /** Defines what subclass of descriptors to search in the descriptor pool. 
+     */
+    enum SearchFilter {
+      TYPES_ONLY, AGGREGATES_ONLY, ALL_SYMBOLS
+    }
+    
     DescriptorPool(final FileDescriptor[] dependencies) {
-      this.dependencies = new DescriptorPool[dependencies.length];
+      this.dependencies = new HashSet<FileDescriptor>();
 
-      for (int i = 0; i < dependencies.length; i++)  {
-        this.dependencies[i] = dependencies[i].pool;
+      for (int i = 0; i < dependencies.length; i++) {
+        this.dependencies.add(dependencies[i]);
+        importPublicDependencies(dependencies[i]);
       }
 
-      for (final FileDescriptor dependency : dependencies) {
+      for (final FileDescriptor dependency : this.dependencies) {
         try {
           addPackage(dependency.getPackage(), dependency);
         } catch (DescriptorValidationException e) {
@@ -1609,7 +1640,16 @@ public final class Descriptors {
       }
     }
 
-    private final DescriptorPool[] dependencies;
+    /** Find and put public dependencies of the file into dependencies set.*/
+    private void importPublicDependencies(final FileDescriptor file) {
+      for (FileDescriptor dependency : file.getPublicDependencies()) {
+        if (dependencies.add(dependency)) {
+          importPublicDependencies(dependency);
+        }
+      }
+    }
+
+    private final Set<FileDescriptor> dependencies;
 
     private final Map<String, GenericDescriptor> descriptorsByName =
       new HashMap<String, GenericDescriptor>();
@@ -1620,39 +1660,81 @@ public final class Descriptors {
 
     /** Find a generic descriptor by fully-qualified name. */
     GenericDescriptor findSymbol(final String fullName) {
+      return findSymbol(fullName, SearchFilter.ALL_SYMBOLS);
+    }
+    
+    /** Find a descriptor by fully-qualified name and given option to only 
+     * search valid field type descriptors. 
+     */
+    GenericDescriptor findSymbol(final String fullName,
+                                 final SearchFilter filter) {
       GenericDescriptor result = descriptorsByName.get(fullName);
       if (result != null) {
-        return result;
+        if ((filter==SearchFilter.ALL_SYMBOLS) ||
+            ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+            ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+          return result;
+        }
       }
 
-      for (final DescriptorPool dependency : dependencies) {
-        result = dependency.descriptorsByName.get(fullName);
+      for (final FileDescriptor dependency : dependencies) {
+        result = dependency.pool.descriptorsByName.get(fullName);
         if (result != null) {
-          return result;
+          if ((filter==SearchFilter.ALL_SYMBOLS) ||
+              ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+              ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+            return result;
+          }
         }
       }
 
       return null;
     }
 
+    /** Checks if the descriptor is a valid type for a message field. */
+    boolean isType(GenericDescriptor descriptor) {
+      return (descriptor instanceof Descriptor) || 
+        (descriptor instanceof EnumDescriptor);
+    }
+    
+    /** Checks if the descriptor is a valid namespace type. */
+    boolean isAggregate(GenericDescriptor descriptor) {
+      return (descriptor instanceof Descriptor) || 
+        (descriptor instanceof EnumDescriptor) || 
+        (descriptor instanceof PackageDescriptor) || 
+        (descriptor instanceof ServiceDescriptor);
+    }
+       
     /**
-     * Look up a descriptor by name, relative to some other descriptor.
+     * Look up a type descriptor by name, relative to some other descriptor.
      * The name may be fully-qualified (with a leading '.'),
      * partially-qualified, or unqualified.  C++-like name lookup semantics
      * are used to search for the matching descriptor.
      */
     GenericDescriptor lookupSymbol(final String name,
-                                   final GenericDescriptor relativeTo)
+                                   final GenericDescriptor relativeTo,
+                                   final DescriptorPool.SearchFilter filter)
                             throws DescriptorValidationException {
       // TODO(kenton):  This could be optimized in a number of ways.
 
       GenericDescriptor result;
       if (name.startsWith(".")) {
         // Fully-qualified name.
-        result = findSymbol(name.substring(1));
+        result = findSymbol(name.substring(1), filter);
       } else {
         // If "name" is a compound identifier, we want to search for the
         // first component of it, then search within it for the rest.
+        // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
+        // defined in multiple parent scopes, we only want to find "Bar.baz" in
+        // the innermost one.  E.g., the following should produce an error:
+        //   message Bar { message Baz {} }
+        //   message Foo {
+        //     message Bar {
+        //     }
+        //     optional Bar.Baz baz = 1;
+        //   }
+        // So, we look for just "Foo" first, then look for "Bar.baz" within it
+        // if found.
         final int firstPartLength = name.indexOf('.');
         final String firstPart;
         if (firstPartLength == -1) {
@@ -1670,14 +1752,15 @@ public final class Descriptors {
           // Chop off the last component of the scope.
           final int dotpos = scopeToTry.lastIndexOf(".");
           if (dotpos == -1) {
-            result = findSymbol(name);
+            result = findSymbol(name, filter);
             break;
           } else {
             scopeToTry.setLength(dotpos + 1);
 
-            // Append firstPart and try to find.
+            // Append firstPart and try to find
             scopeToTry.append(firstPart);
-            result = findSymbol(scopeToTry.toString());
+            result = findSymbol(scopeToTry.toString(), 
+                DescriptorPool.SearchFilter.AGGREGATES_ONLY);
 
             if (result != null) {
               if (firstPartLength != -1) {
@@ -1686,7 +1769,7 @@ public final class Descriptors {
                 // searching parent scopes.
                 scopeToTry.setLength(dotpos + 1);
                 scopeToTry.append(name);
-                result = findSymbol(scopeToTry.toString());
+                result = findSymbol(scopeToTry.toString(), filter);
               }
               break;
             }
@@ -1817,7 +1900,7 @@ public final class Descriptors {
 
     /**
      * Adds a field to the fieldsByNumber table.  Throws an exception if a
-     * field with hte same containing type and number already exists.
+     * field with the same containing type and number already exists.
      */
     void addFieldByNumber(final FieldDescriptor field)
                    throws DescriptorValidationException {
diff --git a/java/src/main/java/com/google/protobuf/DynamicMessage.java b/java/src/main/java/com/google/protobuf/DynamicMessage.java
index c106b662..c0c9fc94 100644
--- a/java/src/main/java/com/google/protobuf/DynamicMessage.java
+++ b/java/src/main/java/com/google/protobuf/DynamicMessage.java
@@ -35,6 +35,7 @@ import com.google.protobuf.Descriptors.FieldDescriptor;
 
 import java.io.InputStream;
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Map;
 
 /**
@@ -160,7 +161,9 @@ public final class DynamicMessage extends AbstractMessage {
     verifyContainingType(field);
     Object result = fields.getField(field);
     if (result == null) {
-      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+      if (field.isRepeated()) {
+        result = Collections.emptyList();
+      } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
         result = getDefaultInstance(field.getMessageType());
       } else {
         result = field.getDefaultValue();
@@ -198,10 +201,12 @@ public final class DynamicMessage extends AbstractMessage {
     return fields.isInitialized();
   }
 
+  @Override
   public boolean isInitialized() {
     return isInitialized(type, fields);
   }
 
+  @Override
   public void writeTo(CodedOutputStream output) throws IOException {
     if (type.getOptions().getMessageSetWireFormat()) {
       fields.writeMessageSetTo(output);
@@ -212,6 +217,7 @@ public final class DynamicMessage extends AbstractMessage {
     }
   }
 
+  @Override
   public int getSerializedSize() {
     int size = memoizedSize;
     if (size != -1) return size;
@@ -236,6 +242,26 @@ public final class DynamicMessage extends AbstractMessage {
     return newBuilderForType().mergeFrom(this);
   }
 
+  public Parser<DynamicMessage> getParserForType() {
+    return new AbstractParser<DynamicMessage>() {
+      public DynamicMessage parsePartialFrom(
+          CodedInputStream input,
+          ExtensionRegistryLite extensionRegistry)
+          throws InvalidProtocolBufferException {
+        Builder builder = newBuilder(type);
+        try {
+          builder.mergeFrom(input, extensionRegistry);
+        } catch (InvalidProtocolBufferException e) {
+          throw e.setUnfinishedMessage(builder.buildPartial());
+        } catch (IOException e) {
+          throw new InvalidProtocolBufferException(e.getMessage())
+              .setUnfinishedMessage(builder.buildPartial());
+        }
+        return builder.buildPartial();
+      }
+    };
+  }
+
   /** Verifies that the field is a field of this message. */
   private void verifyContainingType(FieldDescriptor field) {
     if (field.getContainingType() != type) {
@@ -264,14 +290,18 @@ public final class DynamicMessage extends AbstractMessage {
     // ---------------------------------------------------------------
     // Implementation of Message.Builder interface.
 
+    @Override
     public Builder clear() {
-      if (fields == null) {
-        throw new IllegalStateException("Cannot call clear() after build().");
+      if (fields.isImmutable()) {
+        fields = FieldSet.newFieldSet();
+      } else {
+        fields.clear();
       }
-      fields.clear();
+      unknownFields = UnknownFieldSet.getDefaultInstance();
       return this;
     }
 
+    @Override
     public Builder mergeFrom(Message other) {
       if (other instanceof DynamicMessage) {
         // This should be somewhat faster than calling super.mergeFrom().
@@ -280,6 +310,7 @@ public final class DynamicMessage extends AbstractMessage {
           throw new IllegalArgumentException(
             "mergeFrom(Message) can only merge messages of the same type.");
         }
+        ensureIsMutable();
         fields.mergeFrom(otherDynamicMessage.fields);
         mergeUnknownFields(otherDynamicMessage.unknownFields);
         return this;
@@ -289,8 +320,7 @@ public final class DynamicMessage extends AbstractMessage {
     }
 
     public DynamicMessage build() {
-      // If fields == null, we'll throw an appropriate exception later.
-      if (fields != null && !isInitialized()) {
+      if (!isInitialized()) {
         throw newUninitializedMessageException(
           new DynamicMessage(type, fields, unknownFields));
       }
@@ -312,21 +342,17 @@ public final class DynamicMessage extends AbstractMessage {
     }
 
     public DynamicMessage buildPartial() {
-      if (fields == null) {
-        throw new IllegalStateException(
-            "build() has already been called on this Builder.");
-      }
       fields.makeImmutable();
       DynamicMessage result =
         new DynamicMessage(type, fields, unknownFields);
-      fields = null;
-      unknownFields = null;
       return result;
     }
 
+    @Override
     public Builder clone() {
       Builder result = new Builder(type);
       result.fields.mergeFrom(fields);
+      result.mergeUnknownFields(unknownFields);
       return result;
     }
 
@@ -377,12 +403,14 @@ public final class DynamicMessage extends AbstractMessage {
 
     public Builder setField(FieldDescriptor field, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.setField(field, value);
       return this;
     }
 
     public Builder clearField(FieldDescriptor field) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.clearField(field);
       return this;
     }
@@ -400,12 +428,14 @@ public final class DynamicMessage extends AbstractMessage {
     public Builder setRepeatedField(FieldDescriptor field,
                                     int index, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.setRepeatedField(field, index, value);
       return this;
     }
 
     public Builder addRepeatedField(FieldDescriptor field, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.addRepeatedField(field, value);
       return this;
     }
@@ -419,6 +449,7 @@ public final class DynamicMessage extends AbstractMessage {
       return this;
     }
 
+    @Override
     public Builder mergeUnknownFields(UnknownFieldSet unknownFields) {
       this.unknownFields =
         UnknownFieldSet.newBuilder(this.unknownFields)
@@ -434,5 +465,18 @@ public final class DynamicMessage extends AbstractMessage {
           "FieldDescriptor does not match message type.");
       }
     }
+
+    private void ensureIsMutable() {
+      if (fields.isImmutable()) {
+        fields = fields.clone();
+      }
+    }
+
+    @Override
+    public com.google.protobuf.Message.Builder getFieldBuilder(FieldDescriptor field) {
+      // TODO(xiangl): need implementation for dynamic message
+      throw new UnsupportedOperationException(
+        "getFieldBuilder() called on a dynamic message type.");
+    }
   }
 }
diff --git a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
index d5288dd8..1e1289d0 100644
--- a/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
+++ b/java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
@@ -43,7 +43,7 @@ import java.util.Map;
  * make sense to mix the two, since if you have any regular types in your
  * program, you then require the full runtime and lose all the benefits of
  * the lite runtime, so you might as well make all your types be regular types.
- * However, in some cases (e.g. when depending on multiple third-patry libraries
+ * However, in some cases (e.g. when depending on multiple third-party libraries
  * where one uses lite types and one uses regular), you may find yourself
  * wanting to mix the two.  In this case things get more complicated.
  * <p>
@@ -71,6 +71,22 @@ import java.util.Map;
  * @author kenton@google.com Kenton Varda
  */
 public class ExtensionRegistryLite {
+
+  // Set true to enable lazy parsing feature for MessageSet.
+  //
+  // TODO(xiangl): Now we use a global flag to control whether enable lazy
+  // parsing feature for MessageSet, which may be too crude for some
+  // applications. Need to support this feature on smaller granularity.
+  private static volatile boolean eagerlyParseMessageSets = false;
+
+  public static boolean isEagerlyParseMessageSets() {
+    return eagerlyParseMessageSets;
+  }
+
+  public static void setEagerlyParseMessageSets(boolean isEagerlyParse) {
+    eagerlyParseMessageSets = isEagerlyParse;
+  }
+
   /** Construct a new, empty instance. */
   public static ExtensionRegistryLite newInstance() {
     return new ExtensionRegistryLite();
diff --git a/java/src/main/java/com/google/protobuf/FieldSet.java b/java/src/main/java/com/google/protobuf/FieldSet.java
index a85dbaa6..2663694f 100644
--- a/java/src/main/java/com/google/protobuf/FieldSet.java
+++ b/java/src/main/java/com/google/protobuf/FieldSet.java
@@ -30,12 +30,14 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.LazyField.LazyIterator;
+
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.io.IOException;
 
 /**
  * A class which represents an arbitrary set of fields of some message type.
@@ -68,6 +70,7 @@ final class FieldSet<FieldDescriptorType extends
 
   private final SmallSortedMap<FieldDescriptorType, Object> fields;
   private boolean isImmutable;
+  private boolean hasLazyField = false;
 
   /** Construct a new FieldSet. */
   private FieldSet() {
@@ -95,7 +98,7 @@ final class FieldSet<FieldDescriptorType extends
       FieldSet<T> emptySet() {
     return DEFAULT_INSTANCE;
   }
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings("rawtypes")
   private static final FieldSet DEFAULT_INSTANCE = new FieldSet(true);
 
   /** Make this FieldSet immutable from this point forward. */
@@ -109,7 +112,7 @@ final class FieldSet<FieldDescriptorType extends
   }
 
   /**
-   * Retuns whether the FieldSet is immutable. This is true if it is the
+   * Returns whether the FieldSet is immutable. This is true if it is the
    * {@link #emptySet} or if {@link #makeImmutable} were called.
    *
    * @return whether the FieldSet is immutable.
@@ -139,6 +142,7 @@ final class FieldSet<FieldDescriptorType extends
       FieldDescriptorType descriptor = entry.getKey();
       clone.setField(descriptor, entry.getValue());
     }
+    clone.hasLazyField = hasLazyField;
     return clone;
   }
 
@@ -147,21 +151,52 @@ final class FieldSet<FieldDescriptorType extends
   /** See {@link Message.Builder#clear()}. */
   public void clear() {
     fields.clear();
+    hasLazyField = false;
   }
 
   /**
    * Get a simple map containing all the fields.
    */
   public Map<FieldDescriptorType, Object> getAllFields() {
+    if (hasLazyField) {
+      SmallSortedMap<FieldDescriptorType, Object> result =
+          SmallSortedMap.newFieldMap(16);
+      for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+        cloneFieldEntry(result, fields.getArrayEntryAt(i));
+      }
+      for (Map.Entry<FieldDescriptorType, Object> entry :
+          fields.getOverflowEntries()) {
+        cloneFieldEntry(result, entry);
+      }
+      if (fields.isImmutable()) {
+        result.makeImmutable();
+      }
+      return result;
+    }
     return fields.isImmutable() ? fields : Collections.unmodifiableMap(fields);
   }
 
+  private void cloneFieldEntry(Map<FieldDescriptorType, Object> map,
+      Map.Entry<FieldDescriptorType, Object> entry) {
+    FieldDescriptorType key = entry.getKey();
+    Object value = entry.getValue();
+    if (value instanceof LazyField) {
+      map.put(key, ((LazyField) value).getValue());
+    } else {
+      map.put(key, value);
+    }
+  }
+
   /**
    * Get an iterator to the field map. This iterator should not be leaked out
-   * of the protobuf library as it is not protected from mutation when
-   * fields is not immutable.
+   * of the protobuf library as it is not protected from mutation when fields
+   * is not immutable.
    */
   public Iterator<Map.Entry<FieldDescriptorType, Object>> iterator() {
+    if (hasLazyField) {
+      return new LazyIterator<FieldDescriptorType>(
+          fields.entrySet().iterator());
+    }
     return fields.entrySet().iterator();
   }
 
@@ -185,14 +220,18 @@ final class FieldSet<FieldDescriptorType extends
    * to the caller to fetch the field's default value.
    */
   public Object getField(final FieldDescriptorType descriptor) {
-    return fields.get(descriptor);
+    Object o = fields.get(descriptor);
+    if (o instanceof LazyField) {
+      return ((LazyField) o).getValue();
+    }
+    return o;
   }
 
   /**
    * Useful for implementing
    * {@link Message.Builder#setField(Descriptors.FieldDescriptor,Object)}.
    */
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"unchecked", "rawtypes"})
   public void setField(final FieldDescriptorType descriptor,
                        Object value) {
     if (descriptor.isRepeated()) {
@@ -204,7 +243,7 @@ final class FieldSet<FieldDescriptorType extends
       // Wrap the contents in a new list so that the caller cannot change
       // the list's contents after setting it.
       final List newList = new ArrayList();
-      newList.addAll((List)value);
+      newList.addAll((List) value);
       for (final Object element : newList) {
         verifyType(descriptor.getLiteType(), element);
       }
@@ -213,6 +252,9 @@ final class FieldSet<FieldDescriptorType extends
       verifyType(descriptor.getLiteType(), value);
     }
 
+    if (value instanceof LazyField) {
+      hasLazyField = true;
+    }
     fields.put(descriptor, value);
   }
 
@@ -222,6 +264,9 @@ final class FieldSet<FieldDescriptorType extends
    */
   public void clearField(final FieldDescriptorType descriptor) {
     fields.remove(descriptor);
+    if (fields.isEmpty()) {
+      hasLazyField = false;
+    }
   }
 
   /**
@@ -234,7 +279,7 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object value = fields.get(descriptor);
+    final Object value = getField(descriptor);
     if (value == null) {
       return 0;
     } else {
@@ -253,7 +298,7 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object value = fields.get(descriptor);
+    final Object value = getField(descriptor);
 
     if (value == null) {
       throw new IndexOutOfBoundsException();
@@ -275,13 +320,13 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object list = fields.get(descriptor);
+    final Object list = getField(descriptor);
     if (list == null) {
       throw new IndexOutOfBoundsException();
     }
 
     verifyType(descriptor.getLiteType(), value);
-    ((List) list).set(index, value);
+    ((List<Object>) list).set(index, value);
   }
 
   /**
@@ -298,13 +343,13 @@ final class FieldSet<FieldDescriptorType extends
 
     verifyType(descriptor.getLiteType(), value);
 
-    final Object existingValue = fields.get(descriptor);
-    List list;
+    final Object existingValue = getField(descriptor);
+    List<Object> list;
     if (existingValue == null) {
-      list = new ArrayList();
+      list = new ArrayList<Object>();
       fields.put(descriptor, list);
     } else {
-      list = (List) existingValue;
+      list = (List<Object>) existingValue;
     }
 
     list.add(value);
@@ -338,7 +383,8 @@ final class FieldSet<FieldDescriptorType extends
         break;
       case MESSAGE:
         // TODO(kenton):  Caller must do type checking here, I guess.
-        isValid = value instanceof MessageLite;
+        isValid =
+            (value instanceof MessageLite) || (value instanceof LazyField);
         break;
     }
 
@@ -392,8 +438,16 @@ final class FieldSet<FieldDescriptorType extends
           }
         }
       } else {
-        if (!((MessageLite) entry.getValue()).isInitialized()) {
-          return false;
+        Object value = entry.getValue();
+        if (value instanceof MessageLite) {
+          if (!((MessageLite) value).isInitialized()) {
+            return false;
+          }
+        } else if (value instanceof LazyField) {
+          return true;
+        } else {
+          throw new IllegalArgumentException(
+              "Wrong object type used with protocol message reflection.");
         }
       }
     }
@@ -416,7 +470,8 @@ final class FieldSet<FieldDescriptorType extends
   }
 
   /**
-   * Like {@link #mergeFrom(Message)}, but merges from another {@link FieldSet}.
+   * Like {@link Message.Builder#mergeFrom(Message)}, but merges from another 
+   * {@link FieldSet}.
    */
   public void mergeFrom(final FieldSet<FieldDescriptorType> other) {
     for (int i = 0; i < other.fields.getNumArrayEntries(); i++) {
@@ -428,14 +483,17 @@ final class FieldSet<FieldDescriptorType extends
     }
   }
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"unchecked", "rawtypes"})
   private void mergeFromField(
       final Map.Entry<FieldDescriptorType, Object> entry) {
     final FieldDescriptorType descriptor = entry.getKey();
-    final Object otherValue = entry.getValue();
+    Object otherValue = entry.getValue();
+    if (otherValue instanceof LazyField) {
+      otherValue = ((LazyField) otherValue).getValue();
+    }
 
     if (descriptor.isRepeated()) {
-      Object value = fields.get(descriptor);
+      Object value = getField(descriptor);
       if (value == null) {
         // Our list is empty, but we still need to make a defensive copy of
         // the other list since we don't know if the other FieldSet is still
@@ -446,7 +504,7 @@ final class FieldSet<FieldDescriptorType extends
         ((List) value).addAll((List) otherValue);
       }
     } else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
-      Object value = fields.get(descriptor);
+      Object value = getField(descriptor);
       if (value == null) {
         fields.put(descriptor, otherValue);
       } else {
@@ -457,7 +515,6 @@ final class FieldSet<FieldDescriptorType extends
                 ((MessageLite) value).toBuilder(), (MessageLite) otherValue)
             .build());
       }
-
     } else {
       fields.put(descriptor, otherValue);
     }
@@ -646,7 +703,11 @@ final class FieldSet<FieldDescriptorType extends
         }
       }
     } else {
-      writeElement(output, type, number, value);
+      if (value instanceof LazyField) {
+        writeElement(output, type, number, ((LazyField) value).getValue());
+      } else {
+        writeElement(output, type, number, value);
+      }
     }
   }
 
@@ -686,12 +747,18 @@ final class FieldSet<FieldDescriptorType extends
   private int getMessageSetSerializedSize(
       final Map.Entry<FieldDescriptorType, Object> entry) {
     final FieldDescriptorType descriptor = entry.getKey();
-    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
-        !descriptor.isRepeated() && !descriptor.isPacked()) {
-      return CodedOutputStream.computeMessageSetExtensionSize(
-          entry.getKey().getNumber(), (MessageLite) entry.getValue());
+    Object value = entry.getValue();
+    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE
+        && !descriptor.isRepeated() && !descriptor.isPacked()) {
+      if (value instanceof LazyField) {
+        return CodedOutputStream.computeLazyFieldMessageSetExtensionSize(
+            entry.getKey().getNumber(), (LazyField) value);
+      } else {
+        return CodedOutputStream.computeMessageSetExtensionSize(
+            entry.getKey().getNumber(), (MessageLite) value);
+      }
     } else {
-      return computeFieldSize(descriptor, entry.getValue());
+      return computeFieldSize(descriptor, value);
     }
   }
 
@@ -741,7 +808,6 @@ final class FieldSet<FieldDescriptorType extends
       case BOOL    : return CodedOutputStream.computeBoolSizeNoTag    ((Boolean    )value);
       case STRING  : return CodedOutputStream.computeStringSizeNoTag  ((String     )value);
       case GROUP   : return CodedOutputStream.computeGroupSizeNoTag   ((MessageLite)value);
-      case MESSAGE : return CodedOutputStream.computeMessageSizeNoTag ((MessageLite)value);
       case BYTES   : return CodedOutputStream.computeBytesSizeNoTag   ((ByteString )value);
       case UINT32  : return CodedOutputStream.computeUInt32SizeNoTag  ((Integer    )value);
       case SFIXED32: return CodedOutputStream.computeSFixed32SizeNoTag((Integer    )value);
@@ -749,6 +815,13 @@ final class FieldSet<FieldDescriptorType extends
       case SINT32  : return CodedOutputStream.computeSInt32SizeNoTag  ((Integer    )value);
       case SINT64  : return CodedOutputStream.computeSInt64SizeNoTag  ((Long       )value);
 
+      case MESSAGE:
+        if (value instanceof LazyField) {
+          return CodedOutputStream.computeLazyFieldSizeNoTag((LazyField) value);
+        } else {
+          return CodedOutputStream.computeMessageSizeNoTag((MessageLite) value);
+        }
+
       case ENUM:
         return CodedOutputStream.computeEnumSizeNoTag(
             ((Internal.EnumLite) value).getNumber());
diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/src/main/java/com/google/protobuf/GeneratedMessage.java
index b5eaded5..0c15ca84 100644
--- a/java/src/main/java/com/google/protobuf/GeneratedMessage.java
+++ b/java/src/main/java/com/google/protobuf/GeneratedMessage.java
@@ -58,8 +58,6 @@ public abstract class GeneratedMessage extends AbstractMessage
     implements Serializable {
   private static final long serialVersionUID = 1L;
 
-  private final UnknownFieldSet unknownFields;
-
   /**
    * For testing. Allows a test to disable the optimization that avoids using
    * field builders for nested messages until they are requested. By disabling
@@ -68,11 +66,14 @@ public abstract class GeneratedMessage extends AbstractMessage
   protected static boolean alwaysUseFieldBuilders = false;
 
   protected GeneratedMessage() {
-    this.unknownFields = UnknownFieldSet.getDefaultInstance();
   }
 
   protected GeneratedMessage(Builder<?> builder) {
-    this.unknownFields = builder.getUnknownFields();
+  }
+
+  public Parser<? extends Message> getParserForType() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
   }
 
  /**
@@ -175,8 +176,28 @@ public abstract class GeneratedMessage extends AbstractMessage
   }
 
   //@Override (Java 1.6 override semantics, but we must support 1.5)
-  public final UnknownFieldSet getUnknownFields() {
-    return unknownFields;
+  public UnknownFieldSet getUnknownFields() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
+  }
+
+  /**
+   * Called by subclasses to parse an unknown field.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  protected boolean parseUnknownField(
+      CodedInputStream input,
+      UnknownFieldSet.Builder unknownFields,
+      ExtensionRegistryLite extensionRegistry,
+      int tag) throws IOException {
+    return unknownFields.mergeFieldFrom(tag, input);
+  }
+
+  /**
+   * Used by parsing constructors in generated classes.
+   */
+  protected void makeExtensionsImmutable() {
+    // Noop for messages without extensions.
   }
 
   protected abstract Message.Builder newBuilderForType(BuilderParent parent);
@@ -319,6 +340,11 @@ public abstract class GeneratedMessage extends AbstractMessage
     }
 
     //@Override (Java 1.6 override semantics, but we must support 1.5)
+    public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+      return internalGetFieldAccessorTable().getField(field).getBuilder(this);
+    }
+
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     public boolean hasField(final FieldDescriptor field) {
       return internalGetFieldAccessorTable().getField(field).has(this);
     }
@@ -626,6 +652,25 @@ public abstract class GeneratedMessage extends AbstractMessage
       return super.isInitialized() && extensionsAreInitialized();
     }
 
+    @Override
+    protected boolean parseUnknownField(
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      return AbstractMessage.Builder.mergeFieldFrom(
+        input, unknownFields, extensionRegistry, getDescriptorForType(),
+        null, extensions, tag);
+    }
+
+    /**
+     * Used by parsing constructors in generated classes.
+     */
+    @Override
+    protected void makeExtensionsImmutable() {
+      extensions.makeImmutable();
+    }
+
     /**
      * Used by subclasses to serialize extensions.  Extension ranges may be
      * interleaved with field numbers, but we must write them in canonical
@@ -655,9 +700,21 @@ public abstract class GeneratedMessage extends AbstractMessage
           if (messageSetWireFormat && descriptor.getLiteJavaType() ==
                   WireFormat.JavaType.MESSAGE &&
               !descriptor.isRepeated()) {
-            output.writeMessageSetExtension(descriptor.getNumber(),
-                                            (Message) next.getValue());
+            if (next instanceof LazyField.LazyEntry<?>) {
+              output.writeRawMessageSetExtension(descriptor.getNumber(),
+                  ((LazyField.LazyEntry<?>) next).getField().toByteString());
+            } else {
+              output.writeMessageSetExtension(descriptor.getNumber(),
+                                              (Message) next.getValue());
+            }
           } else {
+            // TODO(xiangl): Taken care of following code, it may cause
+            // problem when we use LazyField for normal fields/extensions.
+            // Due to the optional field can be duplicated at the end of
+            // serialized bytes, which will make the serialized size change
+            // after lazy field parsed. So when we use LazyField globally,
+            // we need to change the following write method to write cached
+            // bytes directly rather than write the parsed message.
             FieldSet.writeField(descriptor, next.getValue(), output);
           }
           if (iter.hasNext()) {
@@ -974,7 +1031,8 @@ public abstract class GeneratedMessage extends AbstractMessage
         final ExtensionRegistryLite extensionRegistry,
         final int tag) throws IOException {
       return AbstractMessage.Builder.mergeFieldFrom(
-        input, unknownFields, extensionRegistry, this, tag);
+        input, unknownFields, extensionRegistry, getDescriptorForType(),
+        this, null, tag);
     }
 
     // ---------------------------------------------------------------
@@ -1405,39 +1463,72 @@ public abstract class GeneratedMessage extends AbstractMessage
         final String[] camelCaseNames,
         final Class<? extends GeneratedMessage> messageClass,
         final Class<? extends Builder> builderClass) {
+      this(descriptor, camelCaseNames);
+      ensureFieldAccessorsInitialized(messageClass, builderClass);
+    }
+
+    /**
+     * Construct a FieldAccessorTable for a particular message class without
+     * initializing FieldAccessors.
+     */
+    public FieldAccessorTable(
+        final Descriptor descriptor,
+        final String[] camelCaseNames) {
       this.descriptor = descriptor;
+      this.camelCaseNames = camelCaseNames;
       fields = new FieldAccessor[descriptor.getFields().size()];
+      initialized = false;
+    }
 
-      for (int i = 0; i < fields.length; i++) {
-        final FieldDescriptor field = descriptor.getFields().get(i);
-        if (field.isRepeated()) {
-          if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-            fields[i] = new RepeatedMessageFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
-            fields[i] = new RepeatedEnumFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else {
-            fields[i] = new RepeatedFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          }
-        } else {
-          if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-            fields[i] = new SingularMessageFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
-            fields[i] = new SingularEnumFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
+    /**
+     * Ensures the field accessors are initialized. This method is thread-safe.
+     *
+     * @param messageClass   The message type.
+     * @param builderClass   The builder type.
+     * @return this
+     */
+    public FieldAccessorTable ensureFieldAccessorsInitialized(
+        Class<? extends GeneratedMessage> messageClass,
+        Class<? extends Builder> builderClass) {
+      if (initialized) { return this; }
+      synchronized (this) {
+        if (initialized) { return this; }
+        for (int i = 0; i < fields.length; i++) {
+          FieldDescriptor field = descriptor.getFields().get(i);
+          if (field.isRepeated()) {
+            if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+              fields[i] = new RepeatedMessageFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+              fields[i] = new RepeatedEnumFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else {
+              fields[i] = new RepeatedFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            }
           } else {
-            fields[i] = new SingularFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
+            if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+              fields[i] = new SingularMessageFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+              fields[i] = new SingularEnumFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else {
+              fields[i] = new SingularFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            }
           }
         }
+        initialized = true;
+        camelCaseNames = null;
+        return this;
       }
     }
 
     private final Descriptor descriptor;
     private final FieldAccessor[] fields;
+    private String[] camelCaseNames;
+    private volatile boolean initialized;
 
     /** Get the FieldAccessor for a particular field. */
     private FieldAccessor getField(final FieldDescriptor field) {
@@ -1472,6 +1563,7 @@ public abstract class GeneratedMessage extends AbstractMessage
       int getRepeatedCount(GeneratedMessage.Builder builder);
       void clear(Builder builder);
       Message.Builder newBuilder();
+      Message.Builder getBuilder(GeneratedMessage.Builder builder);
     }
 
     // ---------------------------------------------------------------
@@ -1551,6 +1643,10 @@ public abstract class GeneratedMessage extends AbstractMessage
         throw new UnsupportedOperationException(
           "newBuilderForField() called on a non-Message type.");
       }
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        throw new UnsupportedOperationException(
+          "getFieldBuilder() called on a non-Message type.");
+      }
     }
 
     private static class RepeatedFieldAccessor implements FieldAccessor {
@@ -1573,8 +1669,6 @@ public abstract class GeneratedMessage extends AbstractMessage
                                    "get" + camelCaseName + "List");
         getMethodBuilder = getMethodOrDie(builderClass,
                                    "get" + camelCaseName + "List");
-
-
         getRepeatedMethod =
             getMethodOrDie(messageClass, "get" + camelCaseName, Integer.TYPE);
         getRepeatedMethodBuilder =
@@ -1625,11 +1719,11 @@ public abstract class GeneratedMessage extends AbstractMessage
       }
       public boolean has(final GeneratedMessage message) {
         throw new UnsupportedOperationException(
-          "hasField() called on a singular field.");
+          "hasField() called on a repeated field.");
       }
       public boolean has(GeneratedMessage.Builder builder) {
         throw new UnsupportedOperationException(
-          "hasField() called on a singular field.");
+          "hasField() called on a repeated field.");
       }
       public int getRepeatedCount(final GeneratedMessage message) {
         return (Integer) invokeOrDie(getCountMethod, message);
@@ -1644,6 +1738,10 @@ public abstract class GeneratedMessage extends AbstractMessage
         throw new UnsupportedOperationException(
           "newBuilderForField() called on a non-Message type.");
       }
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        throw new UnsupportedOperationException(
+          "getFieldBuilder() called on a non-Message type.");
+      }
     }
 
     // ---------------------------------------------------------------
@@ -1753,9 +1851,12 @@ public abstract class GeneratedMessage extends AbstractMessage
         super(descriptor, camelCaseName, messageClass, builderClass);
 
         newBuilderMethod = getMethodOrDie(type, "newBuilder");
+        getBuilderMethodBuilder =
+            getMethodOrDie(builderClass, "get" + camelCaseName + "Builder");
       }
 
       private final Method newBuilderMethod;
+      private final Method getBuilderMethodBuilder;
 
       private Object coerceType(final Object value) {
         if (type.isInstance(value)) {
@@ -1766,7 +1867,7 @@ public abstract class GeneratedMessage extends AbstractMessage
           // DynamicMessage -- we should accept it.  In this case we can make
           // a copy of the message.
           return ((Message.Builder) invokeOrDie(newBuilderMethod, null))
-                  .mergeFrom((Message) value).build();
+                  .mergeFrom((Message) value).buildPartial();
         }
       }
 
@@ -1778,6 +1879,10 @@ public abstract class GeneratedMessage extends AbstractMessage
       public Message.Builder newBuilder() {
         return (Message.Builder) invokeOrDie(newBuilderMethod, null);
       }
+      @Override
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        return (Message.Builder) invokeOrDie(getBuilderMethodBuilder, builder);
+      }
     }
 
     private static final class RepeatedMessageFieldAccessor
@@ -1825,7 +1930,7 @@ public abstract class GeneratedMessage extends AbstractMessage
   /**
    * Replaces this object in the output stream with a serialized form.
    * Part of Java's serialization magic.  Generated sub-classes must override
-   * this method by calling <code>return super.writeReplace();</code>
+   * this method by calling {@code return super.writeReplace();}
    * @return a SerializedForm of this message
    */
   protected Object writeReplace() throws ObjectStreamException {
diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
index 1813e9b3..437e3412 100644
--- a/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
+++ b/java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
@@ -55,6 +55,29 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
   protected GeneratedMessageLite(Builder builder) {
   }
 
+  public Parser<? extends MessageLite> getParserForType() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
+  }
+
+  /**
+   * Called by subclasses to parse an unknown field.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  protected boolean parseUnknownField(
+      CodedInputStream input,
+      ExtensionRegistryLite extensionRegistry,
+      int tag) throws IOException {
+    return input.skipField(tag);
+  }
+
+  /**
+   * Used by parsing constructors in generated classes.
+   */
+  protected void makeExtensionsImmutable() {
+    // Noop for messages without extensions.
+  }
+
   @SuppressWarnings("unchecked")
   public abstract static class Builder<MessageType extends GeneratedMessageLite,
                                        BuilderType extends Builder>
@@ -86,9 +109,9 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
      * @return {@code true} unless the tag is an end-group tag.
      */
     protected boolean parseUnknownField(
-        final CodedInputStream input,
-        final ExtensionRegistryLite extensionRegistry,
-        final int tag) throws IOException {
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
       return input.skipField(tag);
     }
   }
@@ -194,6 +217,31 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
     }
 
     /**
+     * Called by subclasses to parse an unknown field or an extension.
+     * @return {@code true} unless the tag is an end-group tag.
+     */
+    @Override
+    protected boolean parseUnknownField(
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      return GeneratedMessageLite.parseUnknownField(
+          extensions,
+          getDefaultInstanceForType(),
+          input,
+          extensionRegistry,
+          tag);
+    }
+
+    /**
+     * Used by parsing constructors in generated classes.
+     */
+    @Override
+    protected void makeExtensionsImmutable() {
+      extensions.makeImmutable();
+    }
+
+    /**
      * Used by subclasses to serialize extensions.  Extension ranges may be
      * interleaved with field numbers, but we must write them in canonical
      * (sorted by field number) order.  ExtensionWriter helps us write
@@ -400,121 +448,139 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
      */
     @Override
     protected boolean parseUnknownField(
-        final CodedInputStream input,
-        final ExtensionRegistryLite extensionRegistry,
-        final int tag) throws IOException {
-      final int wireType = WireFormat.getTagWireType(tag);
-      final int fieldNumber = WireFormat.getTagFieldNumber(tag);
-
-      final GeneratedExtension<MessageType, ?> extension =
-        extensionRegistry.findLiteExtensionByNumber(
-            getDefaultInstanceForType(), fieldNumber);
-
-      boolean unknown = false;
-      boolean packed = false;
-      if (extension == null) {
-        unknown = true;  // Unknown field.
-      } else if (wireType == FieldSet.getWireFormatForFieldType(
-                   extension.descriptor.getLiteType(),
-                   false  /* isPacked */)) {
-        packed = false;  // Normal, unpacked value.
-      } else if (extension.descriptor.isRepeated &&
-                 extension.descriptor.type.isPackable() &&
-                 wireType == FieldSet.getWireFormatForFieldType(
-                   extension.descriptor.getLiteType(),
-                   true  /* isPacked */)) {
-        packed = true;  // Packed value.
-      } else {
-        unknown = true;  // Wrong wire type.
-      }
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      ensureExtensionsIsMutable();
+      return GeneratedMessageLite.parseUnknownField(
+          extensions,
+          getDefaultInstanceForType(),
+          input,
+          extensionRegistry,
+          tag);
+    }
 
-      if (unknown) {  // Unknown field or wrong wire type.  Skip.
-        return input.skipField(tag);
-      }
+    protected final void mergeExtensionFields(final MessageType other) {
+      ensureExtensionsIsMutable();
+      extensions.mergeFrom(((ExtendableMessage) other).extensions);
+    }
+  }
 
-      if (packed) {
-        final int length = input.readRawVarint32();
-        final int limit = input.pushLimit(length);
-        if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
-          while (input.getBytesUntilLimit() > 0) {
-            final int rawValue = input.readEnum();
-            final Object value =
-                extension.descriptor.getEnumType().findValueByNumber(rawValue);
-            if (value == null) {
-              // If the number isn't recognized as a valid value for this
-              // enum, drop it (don't even add it to unknownFields).
-              return true;
-            }
-            ensureExtensionsIsMutable();
-            extensions.addRepeatedField(extension.descriptor, value);
-          }
-        } else {
-          while (input.getBytesUntilLimit() > 0) {
-            final Object value =
-              FieldSet.readPrimitiveField(input,
-                                          extension.descriptor.getLiteType());
-            ensureExtensionsIsMutable();
-            extensions.addRepeatedField(extension.descriptor, value);
+  // -----------------------------------------------------------------
+
+  /**
+   * Parse an unknown field or an extension.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  private static <MessageType extends MessageLite>
+      boolean parseUnknownField(
+          FieldSet<ExtensionDescriptor> extensions,
+          MessageType defaultInstance,
+          CodedInputStream input,
+          ExtensionRegistryLite extensionRegistry,
+          int tag) throws IOException {
+    int wireType = WireFormat.getTagWireType(tag);
+    int fieldNumber = WireFormat.getTagFieldNumber(tag);
+
+    GeneratedExtension<MessageType, ?> extension =
+      extensionRegistry.findLiteExtensionByNumber(
+          defaultInstance, fieldNumber);
+
+    boolean unknown = false;
+    boolean packed = false;
+    if (extension == null) {
+      unknown = true;  // Unknown field.
+    } else if (wireType == FieldSet.getWireFormatForFieldType(
+                 extension.descriptor.getLiteType(),
+                 false  /* isPacked */)) {
+      packed = false;  // Normal, unpacked value.
+    } else if (extension.descriptor.isRepeated &&
+               extension.descriptor.type.isPackable() &&
+               wireType == FieldSet.getWireFormatForFieldType(
+                 extension.descriptor.getLiteType(),
+                 true  /* isPacked */)) {
+      packed = true;  // Packed value.
+    } else {
+      unknown = true;  // Wrong wire type.
+    }
+
+    if (unknown) {  // Unknown field or wrong wire type.  Skip.
+      return input.skipField(tag);
+    }
+
+    if (packed) {
+      int length = input.readRawVarint32();
+      int limit = input.pushLimit(length);
+      if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
+        while (input.getBytesUntilLimit() > 0) {
+          int rawValue = input.readEnum();
+          Object value =
+              extension.descriptor.getEnumType().findValueByNumber(rawValue);
+          if (value == null) {
+            // If the number isn't recognized as a valid value for this
+            // enum, drop it (don't even add it to unknownFields).
+            return true;
           }
+          extensions.addRepeatedField(extension.descriptor, value);
         }
-        input.popLimit(limit);
       } else {
-        final Object value;
-        switch (extension.descriptor.getLiteJavaType()) {
-          case MESSAGE: {
-            MessageLite.Builder subBuilder = null;
-            if (!extension.descriptor.isRepeated()) {
-              MessageLite existingValue =
-                  (MessageLite) extensions.getField(extension.descriptor);
-              if (existingValue != null) {
-                subBuilder = existingValue.toBuilder();
-              }
-            }
-            if (subBuilder == null) {
-              subBuilder = extension.messageDefaultInstance.newBuilderForType();
-            }
-            if (extension.descriptor.getLiteType() ==
-                WireFormat.FieldType.GROUP) {
-              input.readGroup(extension.getNumber(),
-                              subBuilder, extensionRegistry);
-            } else {
-              input.readMessage(subBuilder, extensionRegistry);
+        while (input.getBytesUntilLimit() > 0) {
+          Object value =
+            FieldSet.readPrimitiveField(input,
+                                        extension.descriptor.getLiteType());
+          extensions.addRepeatedField(extension.descriptor, value);
+        }
+      }
+      input.popLimit(limit);
+    } else {
+      Object value;
+      switch (extension.descriptor.getLiteJavaType()) {
+        case MESSAGE: {
+          MessageLite.Builder subBuilder = null;
+          if (!extension.descriptor.isRepeated()) {
+            MessageLite existingValue =
+                (MessageLite) extensions.getField(extension.descriptor);
+            if (existingValue != null) {
+              subBuilder = existingValue.toBuilder();
             }
-            value = subBuilder.build();
-            break;
           }
-          case ENUM:
-            final int rawValue = input.readEnum();
-            value = extension.descriptor.getEnumType()
-                             .findValueByNumber(rawValue);
-            // If the number isn't recognized as a valid value for this enum,
-            // drop it.
-            if (value == null) {
-              return true;
-            }
-            break;
-          default:
-            value = FieldSet.readPrimitiveField(input,
-                extension.descriptor.getLiteType());
-            break;
-        }
-
-        if (extension.descriptor.isRepeated()) {
-          ensureExtensionsIsMutable();
-          extensions.addRepeatedField(extension.descriptor, value);
-        } else {
-          ensureExtensionsIsMutable();
-          extensions.setField(extension.descriptor, value);
+          if (subBuilder == null) {
+            subBuilder = extension.messageDefaultInstance.newBuilderForType();
+          }
+          if (extension.descriptor.getLiteType() ==
+              WireFormat.FieldType.GROUP) {
+            input.readGroup(extension.getNumber(),
+                            subBuilder, extensionRegistry);
+          } else {
+            input.readMessage(subBuilder, extensionRegistry);
+          }
+          value = subBuilder.build();
+          break;
         }
+        case ENUM:
+          int rawValue = input.readEnum();
+          value = extension.descriptor.getEnumType()
+                           .findValueByNumber(rawValue);
+          // If the number isn't recognized as a valid value for this enum,
+          // drop it.
+          if (value == null) {
+            return true;
+          }
+          break;
+        default:
+          value = FieldSet.readPrimitiveField(input,
+              extension.descriptor.getLiteType());
+          break;
       }
 
-      return true;
+      if (extension.descriptor.isRepeated()) {
+        extensions.addRepeatedField(extension.descriptor, value);
+      } else {
+        extensions.setField(extension.descriptor, value);
+      }
     }
 
-    protected final void mergeExtensionFields(final MessageType other) {
-      ensureExtensionsIsMutable();
-      extensions.mergeFrom(((ExtendableMessage) other).extensions);
-    }
+    return true;
   }
 
   // -----------------------------------------------------------------
@@ -722,7 +788,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
   /**
    * Replaces this object in the output stream with a serialized form.
    * Part of Java's serialization magic.  Generated sub-classes must override
-   * this method by calling <code>return super.writeReplace();</code>
+   * this method by calling {@code return super.writeReplace();}
    * @return a SerializedForm of this message
    */
   protected Object writeReplace() throws ObjectStreamException {
diff --git a/java/src/main/java/com/google/protobuf/Internal.java b/java/src/main/java/com/google/protobuf/Internal.java
index 05eab57a..81af2583 100644
--- a/java/src/main/java/com/google/protobuf/Internal.java
+++ b/java/src/main/java/com/google/protobuf/Internal.java
@@ -103,85 +103,32 @@ public class Internal {
    * Helper called by generated code to determine if a byte array is a valid
    * UTF-8 encoded string such that the original bytes can be converted to
    * a String object and then back to a byte array round tripping the bytes
-   * without loss.
-   * <p>
-   * This is inspired by UTF_8.java in sun.nio.cs.
+   * without loss.  More precisely, returns {@code true} whenever:
+   * <pre>   {@code
+   * Arrays.equals(byteString.toByteArray(),
+   *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+   * }</pre>
+   *
+   * <p>This method rejects "overlong" byte sequences, as well as
+   * 3-byte sequences that would map to a surrogate character, in
+   * accordance with the restricted definition of UTF-8 introduced in
+   * Unicode 3.1.  Note that the UTF-8 decoder included in Oracle's
+   * JDK has been modified to also reject "overlong" byte sequences,
+   * but currently (2011) still accepts 3-byte surrogate character
+   * byte sequences.
+   *
+   * <p>See the Unicode Standard,</br>
+   * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+   * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+   *
+   * <p>As of 2011-02, this method simply returns the result of {@link
+   * ByteString#isValidUtf8()}.  Calling that method directly is preferred.
    *
    * @param byteString the string to check
    * @return whether the byte array is round trippable
    */
   public static boolean isValidUtf8(ByteString byteString) {
-    int index = 0;
-    int size = byteString.size();
-    // To avoid the masking, we could change this to use bytes;
-    // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
-    // gets turned into X >= 0, etc.
-
-    while (index < size) {
-      int byte1 = byteString.byteAt(index++) & 0xFF;
-      if (byte1 < 0x80) {
-        // fast loop for single bytes
-        continue;
-
-        // we know from this point on that we have 2-4 byte forms
-      } else if (byte1 < 0xC2 || byte1 > 0xF4) {
-        // catch illegal first bytes: < C2 or > F4
-        return false;
-      }
-      if (index >= size) {
-        // fail if we run out of bytes
-        return false;
-      }
-      int byte2 = byteString.byteAt(index++) & 0xFF;
-      if (byte2 < 0x80 || byte2 > 0xBF) {
-        // general trail-byte test
-        return false;
-      }
-      if (byte1 <= 0xDF) {
-        // two-byte form; general trail-byte test is sufficient
-        continue;
-      }
-
-      // we know from this point on that we have 3 or 4 byte forms
-      if (index >= size) {
-        // fail if we run out of bytes
-        return false;
-      }
-      int byte3 = byteString.byteAt(index++) & 0xFF;
-      if (byte3 < 0x80 || byte3 > 0xBF) {
-        // general trail-byte test
-        return false;
-      }
-      if (byte1 <= 0xEF) {
-        // three-byte form. Vastly more frequent than four-byte forms
-        // The following has an extra test, but not worth restructuring
-        if (byte1 == 0xE0 && byte2 < 0xA0 ||
-            byte1 == 0xED && byte2 > 0x9F) {
-          // check special cases of byte2
-          return false;
-        }
-
-      } else {
-        // four-byte form
-
-        if (index >= size) {
-          // fail if we run out of bytes
-          return false;
-        }
-        int byte4 = byteString.byteAt(index++) & 0xFF;
-        if (byte4 < 0x80 || byte4 > 0xBF) {
-          // general trail-byte test
-          return false;
-        }
-        // The following has an extra test, but not worth restructuring
-        if (byte1 == 0xF0 && byte2 < 0x90 ||
-            byte1 == 0xF4 && byte2 > 0x8F) {
-          // check special cases of byte2
-          return false;
-        }
-      }
-    }
-    return true;
+    return byteString.isValidUtf8();
   }
 
   /**
diff --git a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
index 90f7ffbc..72d7ff7d 100644
--- a/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
+++ b/java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
@@ -40,11 +40,32 @@ import java.io.IOException;
  */
 public class InvalidProtocolBufferException extends IOException {
   private static final long serialVersionUID = -1616151763072450476L;
+  private MessageLite unfinishedMessage = null;
 
   public InvalidProtocolBufferException(final String description) {
     super(description);
   }
 
+  /**
+   * Attaches an unfinished message to the exception to support best-effort
+   * parsing in {@code Parser} interface.
+   *
+   * @return this
+   */
+  public InvalidProtocolBufferException setUnfinishedMessage(
+      MessageLite unfinishedMessage) {
+    this.unfinishedMessage = unfinishedMessage;
+    return this;
+  }
+
+  /**
+   * Returns the unfinished message attached to the exception, or null if
+   * no message is attached.
+   */
+  public MessageLite getUnfinishedMessage() {
+    return unfinishedMessage;
+  }
+
   static InvalidProtocolBufferException truncatedMessage() {
     return new InvalidProtocolBufferException(
       "While parsing a protocol message, the input ended unexpectedly " +
diff --git a/java/src/main/java/com/google/protobuf/LazyField.java b/java/src/main/java/com/google/protobuf/LazyField.java
new file mode 100644
index 00000000..df9425eb
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/LazyField.java
@@ -0,0 +1,216 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+/**
+ * LazyField encapsulates the logic of lazily parsing message fields. It stores
+ * the message in a ByteString initially and then parse it on-demand.
+ *
+ * LazyField is thread-compatible e.g. concurrent read are safe, however,
+ * synchronizations are needed under read/write situations.
+ *
+ * Now LazyField is only used to lazily load MessageSet.
+ * TODO(xiangl): Use LazyField to lazily load all messages.
+ *
+ * @author xiangl@google.com (Xiang Li)
+ */
+class LazyField {
+
+  final private MessageLite defaultInstance;
+  final private ExtensionRegistryLite extensionRegistry;
+
+  // Mutable because it is initialized lazily.
+  private ByteString bytes;
+  private volatile MessageLite value;
+  private volatile boolean isDirty = false;
+
+  public LazyField(MessageLite defaultInstance,
+      ExtensionRegistryLite extensionRegistry, ByteString bytes) {
+    this.defaultInstance = defaultInstance;
+    this.extensionRegistry = extensionRegistry;
+    this.bytes = bytes;
+  }
+
+  public MessageLite getValue() {
+    ensureInitialized();
+    return value;
+  }
+
+  /**
+   * LazyField is not thread-safe for write access. Synchronizations are needed
+   * under read/write situations.
+   */
+  public MessageLite setValue(MessageLite value) {
+    MessageLite originalValue = this.value;
+    this.value = value;
+    bytes = null;
+    isDirty = true;
+    return originalValue;
+  }
+
+  /**
+   * Due to the optional field can be duplicated at the end of serialized
+   * bytes, which will make the serialized size changed after LazyField
+   * parsed. Be careful when using this method.
+   */
+  public int getSerializedSize() {
+    if (isDirty) {
+      return value.getSerializedSize();
+    }
+    return bytes.size();
+  }
+
+  public ByteString toByteString() {
+    if (!isDirty) {
+      return bytes;
+    }
+    synchronized (this) {
+      if (!isDirty) {
+        return bytes;
+      }
+      bytes = value.toByteString();
+      isDirty = false;
+      return bytes;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    ensureInitialized();
+    return value.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    ensureInitialized();
+    return value.equals(obj);
+  }
+
+  @Override
+  public String toString() {
+    ensureInitialized();
+    return value.toString();
+  }
+
+  private void ensureInitialized() {
+    if (value != null) {
+      return;
+    }
+    synchronized (this) {
+      if (value != null) {
+        return;
+      }
+      try {
+        if (bytes != null) {
+          value = defaultInstance.getParserForType()
+              .parseFrom(bytes, extensionRegistry);
+        }
+      } catch (IOException e) {
+        // TODO(xiangl): Refactory the API to support the exception thrown from
+        // lazily load messages.
+      }
+    }
+  }
+
+  // ====================================================
+
+  /**
+   * LazyEntry and LazyIterator are used to encapsulate the LazyField, when
+   * users iterate all fields from FieldSet.
+   */
+  static class LazyEntry<K> implements Entry<K, Object> {
+    private Entry<K, LazyField> entry;
+
+    private LazyEntry(Entry<K, LazyField> entry) {
+      this.entry = entry;
+    }
+
+    @Override
+    public K getKey() {
+      return entry.getKey();
+    }
+
+    @Override
+    public Object getValue() {
+      LazyField field = entry.getValue();
+      if (field == null) {
+        return null;
+      }
+      return field.getValue();
+    }
+
+    public LazyField getField() {
+      return entry.getValue();
+    }
+
+    @Override
+    public Object setValue(Object value) {
+      if (!(value instanceof MessageLite)) {
+        throw new IllegalArgumentException(
+            "LazyField now only used for MessageSet, "
+            + "and the value of MessageSet must be an instance of MessageLite");
+      }
+      return entry.getValue().setValue((MessageLite) value);
+    }
+  }
+
+  static class LazyIterator<K> implements Iterator<Entry<K, Object>> {
+    private Iterator<Entry<K, Object>> iterator;
+
+    public LazyIterator(Iterator<Entry<K, Object>> iterator) {
+      this.iterator = iterator;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return iterator.hasNext();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Entry<K, Object> next() {
+      Entry<K, ?> entry = iterator.next();
+      if (entry.getValue() instanceof LazyField) {
+        return new LazyEntry<K>((Entry<K, LazyField>) entry);
+      }
+      return (Entry<K, Object>) entry;
+    }
+
+    @Override
+    public void remove() {
+      iterator.remove();
+    }
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
index 1683a640..75c6a4b7 100644
--- a/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
+++ b/java/src/main/java/com/google/protobuf/LazyStringArrayList.java
@@ -33,8 +33,9 @@ package com.google.protobuf;
 import java.util.List;
 import java.util.AbstractList;
 import java.util.ArrayList;
-import java.util.RandomAccess;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.RandomAccess;
 
 /**
  * An implementation of {@link LazyStringList} that wraps an ArrayList. Each
@@ -72,6 +73,11 @@ public class LazyStringArrayList extends AbstractList<String>
     list = new ArrayList<Object>();
   }
 
+  public LazyStringArrayList(LazyStringList from) {
+    list = new ArrayList<Object>(from.size());
+    addAll(from);
+  }
+
   public LazyStringArrayList(List<String> from) {
     list = new ArrayList<Object>(from);
   }
@@ -84,7 +90,7 @@ public class LazyStringArrayList extends AbstractList<String>
     } else {
       ByteString bs = (ByteString) o;
       String s = bs.toStringUtf8();
-      if (Internal.isValidUtf8(bs)) {
+      if (bs.isValidUtf8()) {
         list.set(index, s);
       }
       return s;
@@ -109,8 +115,21 @@ public class LazyStringArrayList extends AbstractList<String>
   }
 
   @Override
+  public boolean addAll(Collection<? extends String> c) {
+    // The default implementation of AbstractCollection.addAll(Collection)
+    // delegates to add(Object). This implementation instead delegates to
+    // addAll(int, Collection), which makes a special case for Collections
+    // which are instances of LazyStringList.
+    return addAll(size(), c);
+  }
+
+  @Override
   public boolean addAll(int index, Collection<? extends String> c) {
-    boolean ret = list.addAll(index, c);
+    // When copying from another LazyStringList, directly copy the underlying
+    // elements rather than forcing each element to be decoded to a String.
+    Collection<?> collection = c instanceof LazyStringList
+        ? ((LazyStringList) c).getUnderlyingElements() : c;
+    boolean ret = list.addAll(index, collection);
     modCount++;
     return ret;
   }
@@ -152,4 +171,9 @@ public class LazyStringArrayList extends AbstractList<String>
       return ((ByteString) o).toStringUtf8();
     }
   }
+
+  @Override
+  public List<?> getUnderlyingElements() {
+    return Collections.unmodifiableList(list);
+  }
 }
diff --git a/java/src/main/java/com/google/protobuf/LazyStringList.java b/java/src/main/java/com/google/protobuf/LazyStringList.java
index 97139ca6..630932fe 100644
--- a/java/src/main/java/com/google/protobuf/LazyStringList.java
+++ b/java/src/main/java/com/google/protobuf/LazyStringList.java
@@ -33,7 +33,7 @@ package com.google.protobuf;
 import java.util.List;
 
 /**
- * An interface extending List&lt;String&gt; that also provides access to the
+ * An interface extending {@code List<String>} that also provides access to the
  * items of the list as UTF8-encoded ByteString objects. This is used by the
  * protocol buffer implementation to support lazily converting bytes parsed
  * over the wire to String objects until needed and also increases the
@@ -41,9 +41,9 @@ import java.util.List;
  * ByteString is already cached.
  * <p>
  * This only adds additional methods that are required for the use in the
- * protocol buffer code in order to be able successfuly round trip byte arrays
+ * protocol buffer code in order to be able successfully round trip byte arrays
  * through parsing and serialization without conversion to strings. It's not
- * attempting to support the functionality of say List&ltByteString&gt, hence
+ * attempting to support the functionality of say {@code List<ByteString>}, hence
  * why only these two very specific methods are added.
  *
  * @author jonp@google.com (Jon Perlow)
@@ -56,7 +56,7 @@ public interface LazyStringList extends List<String> {
    * @param index index of the element to return
    * @return the element at the specified position in this list
    * @throws IndexOutOfBoundsException if the index is out of range
-   *         (<tt>index &lt; 0 || index &gt;= size()</tt>)
+   *         ({@code index < 0 || index >= size()})
    */
   ByteString getByteString(int index);
 
@@ -69,4 +69,13 @@ public interface LazyStringList extends List<String> {
    *         is not supported by this list
    */
   void add(ByteString element);
+
+  /**
+   * Returns an unmodifiable List of the underlying elements, each of
+   * which is either a {@code String} or its equivalent UTF-8 encoded
+   * {@code ByteString}. It is an error for the caller to modify the returned
+   * List, and attempting to do so will result in an
+   * {@link UnsupportedOperationException}.
+   */
+  List<?> getUnderlyingElements();
 }
diff --git a/java/src/main/java/com/google/protobuf/LiteralByteString.java b/java/src/main/java/com/google/protobuf/LiteralByteString.java
new file mode 100644
index 00000000..93c53dce
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/LiteralByteString.java
@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * This class implements a {@link com.google.protobuf.ByteString} backed by a
+ * single array of bytes, contiguous in memory. It supports substring by
+ * pointing to only a sub-range of the underlying byte array, meaning that a
+ * substring will reference the full byte-array of the string it's made from,
+ * exactly as with {@link String}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class LiteralByteString extends ByteString {
+
+  protected final byte[] bytes;
+
+  /**
+   * Creates a {@code LiteralByteString} backed by the given array, without
+   * copying.
+   *
+   * @param bytes array to wrap
+   */
+  LiteralByteString(byte[] bytes) {
+    this.bytes = bytes;
+  }
+
+  @Override
+  public byte byteAt(int index) {
+    // Unlike most methods in this class, this one is a direct implementation
+    // ignoring the potential offset because we need to do range-checking in the
+    // substring case anyway.
+    return bytes[index];
+  }
+
+  @Override
+  public int size() {
+    return bytes.length;
+  }
+
+  // =================================================================
+  // ByteString -> substring
+
+  @Override
+  public ByteString substring(int beginIndex, int endIndex) {
+    if (beginIndex < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index: " + beginIndex + " < 0");
+    }
+    if (endIndex > size()) {
+      throw new IndexOutOfBoundsException("End index: " + endIndex + " > " +
+          size());
+    }
+    int substringLength = endIndex - beginIndex;
+    if (substringLength < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index larger than ending index: " + beginIndex + ", "
+              + endIndex);
+    }
+
+    ByteString result;
+    if (substringLength == 0) {
+      result = ByteString.EMPTY;
+    } else {
+      result = new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex,
+          substringLength);
+    }
+    return result;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset, 
+      int targetOffset, int numberToCopy) {
+    // Optimized form, not for subclasses, since we don't call
+    // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
+    System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
+  }
+
+  @Override
+  public void copyTo(ByteBuffer target) {
+    target.put(bytes, getOffsetIntoBytes(), size());  // Copies bytes
+  }
+
+  @Override
+  public ByteBuffer asReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer =
+        ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size());
+    return byteBuffer.asReadOnlyBuffer();
+  }
+
+  @Override
+  public List<ByteBuffer> asReadOnlyByteBufferList() {
+    // Return the ByteBuffer generated by asReadOnlyByteBuffer() as a singleton
+    List<ByteBuffer> result = new ArrayList<ByteBuffer>(1);
+    result.add(asReadOnlyByteBuffer());
+    return result;
+ }
+
+ @Override
+  public void writeTo(OutputStream outputStream) throws IOException {
+    outputStream.write(toByteArray());
+  }
+
+  @Override
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    return new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+  }
+
+  // =================================================================
+  // UTF-8 decoding
+
+  @Override
+  public boolean isValidUtf8() {
+    int offset = getOffsetIntoBytes();
+    return Utf8.isValidUtf8(bytes, offset, offset + size());
+  }
+
+  @Override
+  protected int partialIsValidUtf8(int state, int offset, int length) {
+    int index = getOffsetIntoBytes() + offset;
+    return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
+  }
+
+  // =================================================================
+  // equals() and hashCode()
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof ByteString)) {
+      return false;
+    }
+
+    if (size() != ((ByteString) other).size()) {
+      return false;
+    }
+    if (size() == 0) {
+      return true;
+    }
+
+    if (other instanceof LiteralByteString) {
+      return equalsRange((LiteralByteString) other, 0, size());
+    } else if (other instanceof RopeByteString) {
+      return other.equals(this);
+    } else {
+      throw new IllegalArgumentException(
+          "Has a new type of ByteString been created? Found "
+              + other.getClass());
+    }
+  }
+
+  /**
+   * Check equality of the substring of given length of this object starting at
+   * zero with another {@code LiteralByteString} substring starting at offset.
+   *
+   * @param other  what to compare a substring in
+   * @param offset offset into other
+   * @param length number of bytes to compare
+   * @return true for equality of substrings, else false.
+   */
+  boolean equalsRange(LiteralByteString other, int offset, int length) {
+    if (length > other.size()) {
+      throw new IllegalArgumentException(
+          "Length too large: " + length + size());
+    }
+    if (offset + length > other.size()) {
+      throw new IllegalArgumentException(
+          "Ran off end of other: " + offset + ", " + length + ", " +
+              other.size());
+    }
+
+    byte[] thisBytes = bytes;
+    byte[] otherBytes = other.bytes;
+    int thisLimit = getOffsetIntoBytes() + length;
+    for (int thisIndex = getOffsetIntoBytes(), otherIndex =
+        other.getOffsetIntoBytes() + offset;
+        (thisIndex < thisLimit); ++thisIndex, ++otherIndex) {
+      if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Cached hash value.  Intentionally accessed via a data race, which
+   * is safe because of the Java Memory Model's "no out-of-thin-air values"
+   * guarantees for ints.
+   */
+  private int hash = 0;
+
+  /**
+   * Compute the hashCode using the traditional algorithm from {@link
+   * ByteString}.
+   *
+   * @return hashCode value
+   */
+  @Override
+  public int hashCode() {
+    int h = hash;
+
+    if (h == 0) {
+      int size = size();
+      h = partialHash(size, 0, size);
+      if (h == 0) {
+        h = 1;
+      }
+      hash = h;
+    }
+    return h;
+  }
+
+  @Override
+  protected int peekCachedHashCode() {
+    return hash;
+  }
+
+  @Override
+  protected int partialHash(int h, int offset, int length) {
+    byte[] thisBytes = bytes;
+    for (int i = getOffsetIntoBytes() + offset, limit = i + length; i < limit;
+        i++) {
+      h = h * 31 + thisBytes[i];
+    }
+    return h;
+  }
+
+  // =================================================================
+  // Input stream
+
+  @Override
+  public InputStream newInput() {
+    return new ByteArrayInputStream(bytes, getOffsetIntoBytes(),
+        size());  // No copy
+  }
+
+  @Override
+  public CodedInputStream newCodedInput() {
+    // We trust CodedInputStream not to modify the bytes, or to give anyone
+    // else access to them.
+    return CodedInputStream
+        .newInstance(bytes, getOffsetIntoBytes(), size());  // No copy
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new LiteralByteIterator();
+  }
+
+  private class LiteralByteIterator implements ByteIterator {
+    private int position;
+    private final int limit;
+
+    private LiteralByteIterator() {
+      position = 0;
+      limit = size();
+    }
+
+    public boolean hasNext() {
+      return (position < limit);
+    }
+
+    public Byte next() {
+      // Boxing calls Byte.valueOf(byte), which does not instantiate.
+      return nextByte();
+    }
+
+    public byte nextByte() {
+      try {
+        return bytes[position++];
+      } catch (ArrayIndexOutOfBoundsException e) {
+        throw new NoSuchElementException(e.getMessage());
+      }
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  // =================================================================
+  // Internal methods
+
+  @Override
+  protected int getTreeDepth() {
+    return 0;
+  }
+
+  @Override
+  protected boolean isBalanced() {
+    return true;
+  }
+
+  /**
+   * Offset into {@code bytes[]} to use, non-zero for substrings.
+   *
+   * @return always 0 for this class
+   */
+  protected int getOffsetIntoBytes() {
+    return 0;
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/Message.java b/java/src/main/java/com/google/protobuf/Message.java
index 67c4148e..2b881413 100644
--- a/java/src/main/java/com/google/protobuf/Message.java
+++ b/java/src/main/java/com/google/protobuf/Message.java
@@ -50,25 +50,28 @@ import java.util.Map;
  */
 public interface Message extends MessageLite, MessageOrBuilder {
 
+  // (From MessageLite, re-declared here only for return type covariance.)
+  Parser<? extends Message> getParserForType();
+
   // -----------------------------------------------------------------
   // Comparison and hashing
 
   /**
    * Compares the specified object with this message for equality.  Returns
-   * <tt>true</tt> if the given object is a message of the same type (as
+   * {@code true} if the given object is a message of the same type (as
    * defined by {@code getDescriptorForType()}) and has identical values for
    * all of its fields.  Subclasses must implement this; inheriting
    * {@code Object.equals()} is incorrect.
    *
    * @param other object to be compared for equality with this message
-   * @return <tt>true</tt> if the specified object is equal to this message
+   * @return {@code true} if the specified object is equal to this message
    */
   @Override
   boolean equals(Object other);
 
   /**
    * Returns the hash code value for this message.  The hash code of a message
-   * should mix the message's type (object identity of the decsriptor) with its
+   * should mix the message's type (object identity of the descriptor) with its
    * contents (known and unknown field values).  Subclasses must implement this;
    * inheriting {@code Object.hashCode()} is incorrect.
    *
@@ -83,7 +86,8 @@ public interface Message extends MessageLite, MessageOrBuilder {
 
   /**
    * Converts the message to a string in protocol buffer text format. This is
-   * just a trivial wrapper around {@link TextFormat#printToString(Message)}.
+   * just a trivial wrapper around {@link
+   * TextFormat#printToString(MessageOrBuilder)}.
    */
   @Override
   String toString();
@@ -145,6 +149,24 @@ public interface Message extends MessageLite, MessageOrBuilder {
     Builder newBuilderForField(Descriptors.FieldDescriptor field);
 
     /**
+     * Get a nested builder instance for the given field.
+     * <p>
+     * Normally, we hold a reference to the immutable message object for the
+     * message type field. Some implementations(the generated message builders),
+     * however, can also hold a reference to the builder object (a nested
+     * builder) for the field.
+     * <p>
+     * If the field is already backed up by a nested builder, the nested builder
+     * will be returned. Otherwise, a new field builder will be created and
+     * returned. The original message field (if exist) will be merged into the
+     * field builder, which will then be nested into its parent builder.
+     * <p>
+     * NOTE: implementations that do not support nested builders will throw
+     * <code>UnsupportedException</code>.
+     */
+    Builder getFieldBuilder(Descriptors.FieldDescriptor field);
+
+    /**
      * Sets a field to the given value.  The value must be of the correct type
      * for this field, i.e. the same type that
      * {@link Message#getField(Descriptors.FieldDescriptor)} would return.
diff --git a/java/src/main/java/com/google/protobuf/MessageLite.java b/java/src/main/java/com/google/protobuf/MessageLite.java
index 31b8256e..e5b9a47b 100644
--- a/java/src/main/java/com/google/protobuf/MessageLite.java
+++ b/java/src/main/java/com/google/protobuf/MessageLite.java
@@ -79,6 +79,12 @@ public interface MessageLite extends MessageLiteOrBuilder {
    */
   int getSerializedSize();
 
+
+  /**
+   * Gets the parser for a message of the same type as this message.
+   */
+  Parser<? extends MessageLite> getParserForType();
+
   // -----------------------------------------------------------------
   // Convenience methods.
 
@@ -144,11 +150,8 @@ public interface MessageLite extends MessageLiteOrBuilder {
     Builder clear();
 
     /**
-     * Construct the final message.  Once this is called, the Builder is no
-     * longer valid, and calling any other method will result in undefined
-     * behavior and may throw a NullPointerException.  If you need to continue
-     * working with the builder after calling {@code build()}, {@code clone()}
-     * it first.
+     * Constructs the message based on the state of the Builder. Subsequent
+     * changes to the Builder will not affect the returned message.
      * @throws UninitializedMessageException The message is missing one or more
      *         required fields (i.e. {@link #isInitialized()} returns false).
      *         Use {@link #buildPartial()} to bypass this check.
@@ -158,11 +161,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Like {@link #build()}, but does not throw an exception if the message
      * is missing required fields.  Instead, a partial message is returned.
-     * Once this is called, the Builder is no longer valid, and calling any
-     * will result in undefined behavior and may throw a NullPointerException.
-     *
-     * If you need to continue working with the builder after calling
-     * {@code buildPartial()}, {@code clone()} it first.
+     * Subsequent changes to the Builder will not affect the returned message.
      */
     MessageLite buildPartial();
 
@@ -174,7 +173,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
 
     /**
      * Parses a message of this type from the input and merges it with this
-     * message, as if using {@link Builder#mergeFrom(MessageLite)}.
+     * message.
      *
      * <p>Warning:  This does not verify that all required fields are present in
      * the input message.  If you call {@link #build()} without setting all
@@ -184,11 +183,6 @@ public interface MessageLite extends MessageLiteOrBuilder {
      * <ul>
      *   <li>Call {@link #isInitialized()} to verify that all required fields
      *       are set before building.
-     *   <li>Parse the message separately using one of the static
-     *       {@code parseFrom} methods, then use {@link #mergeFrom(MessageLite)}
-     *       to merge it with this one.  {@code parseFrom} will throw an
-     *       {@link InvalidProtocolBufferException} (an {@code IOException})
-     *       if some required fields are missing.
      *   <li>Use {@code buildPartial()} to build, which ignores missing
      *       required fields.
      * </ul>
@@ -225,7 +219,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -255,7 +249,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -266,7 +260,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -293,7 +287,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse a message of this type from {@code input} and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -308,9 +302,9 @@ public interface MessageLite extends MessageLiteOrBuilder {
      * {@link MessageLite#writeDelimitedTo(OutputStream)} to write messages in
      * this format.
      *
-     * @returns True if successful, or false if the stream is at EOF when the
-     *          method starts.  Any other error (including reaching EOF during
-     *          parsing) will cause an exception to be thrown.
+     * @return True if successful, or false if the stream is at EOF when the
+     *         method starts.  Any other error (including reaching EOF during
+     *         parsing) will cause an exception to be thrown.
      */
     boolean mergeDelimitedFrom(InputStream input)
                                throws IOException;
diff --git a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
index 7cc72e9c..05b2b161 100644
--- a/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
+++ b/java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
@@ -52,6 +52,8 @@ public interface MessageLiteOrBuilder {
   /**
    * Returns true if all required fields in the message and all embedded
    * messages are set, false otherwise.
+   *
+   * <p>See also: {@link MessageOrBuilder#getInitializationErrorString()}
    */
   boolean isInitialized();
 
diff --git a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
index 0132e7ca..bf62d45e 100644
--- a/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
+++ b/java/src/main/java/com/google/protobuf/MessageOrBuilder.java
@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -45,6 +46,24 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   Message getDefaultInstanceForType();
 
   /**
+   * Returns a list of field paths (e.g. "foo.bar.baz") of required fields
+   * which are not set in this message.  You should call
+   * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+   * are any missing fields, as that method is likely to be much faster
+   * than this one even when the message is fully-initialized.
+   */
+  List<String> findInitializationErrors();
+
+  /**
+   * Returns a comma-delimited list of required fields which are not set
+   * in this message object.  You should call
+   * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+   * are any missing fields, as that method is likely to be much faster
+   * than this one even when the message is fully-initialized.
+   */
+  String getInitializationErrorString();
+
+  /**
    * Get the message's type's descriptor.  This differs from the
    * {@code getDescriptor()} method of generated message classes in that
    * this method is an abstract method of the {@code Message} interface
@@ -80,7 +99,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   /**
    * Obtains the value of the given field, or the default value if it is
    * not set.  For primitive fields, the boxed primitive value is returned.
-   * For enum fields, the EnumValueDescriptor for the value is returend. For
+   * For enum fields, the EnumValueDescriptor for the value is returned. For
    * embedded message fields, the sub-message is returned.  For repeated
    * fields, a java.util.List is returned.
    */
@@ -98,7 +117,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   /**
    * Gets an element of a repeated field.  For primitive fields, the boxed
    * primitive value is returned.  For enum fields, the EnumValueDescriptor
-   * for the value is returend. For embedded message fields, the sub-message
+   * for the value is returned. For embedded message fields, the sub-message
    * is returned.
    * @throws IllegalArgumentException The field is not a repeated field, or
    *           {@code field.getContainingType() != getDescriptorForType()}.
diff --git a/java/src/main/java/com/google/protobuf/Parser.java b/java/src/main/java/com/google/protobuf/Parser.java
new file mode 100644
index 00000000..7d8e8217
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/Parser.java
@@ -0,0 +1,259 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.InputStream;
+
+/**
+ * Abstract interface for parsing Protocol Messages.
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public interface Parser<MessageType> {
+  /**
+   * Parses a message of {@code MessageType} from the input.
+   *
+   * <p>Note:  The caller should call
+   * {@link CodedInputStream#checkLastTagWas(int)} after calling this to
+   * verify that the last tag seen was the appropriate end-group tag,
+   * or zero for EOF.
+   */
+  public MessageType parseFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream)}, but also parses extensions.
+   * The extensions that you want to be able to parse must be registered in
+   * {@code extensionRegistry}. Extensions not in the registry will be treated
+   * as unknown fields.
+   */
+  public MessageType parseFrom(CodedInputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream input, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(CodedInputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  // ---------------------------------------------------------------
+  // Convenience methods.
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(ByteString data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(ByteString data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(ByteString data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(ByteString data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(byte[] data, int off, int len,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(byte[] data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(byte[] data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[], int, int)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data, int off, int len,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[])}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[], ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parse a message of {@code MessageType} from {@code input}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   * Note that this method always reads the <i>entire</i> input (unless it
+   * throws an exception).  If you want it to stop earlier, you will need to
+   * wrap your input in some wrapper stream that limits reading.  Or, use
+   * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write your
+   * message and {@link #parseDelimitedFrom(InputStream)} to read it.
+   * <p>
+   * Despite usually reading the entire input, this does not close the stream.
+   */
+  public MessageType parseFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses a message of {@code MessageType} from {@code input}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(InputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(InputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream)}, but does not read util EOF.
+   * Instead, the size of message (encoded as a varint) is read first,
+   * then the message data. Use
+   * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write
+   * messages in this format.
+   *
+   * @return True if successful, or false if the stream is at EOF when the
+   *         method starts. Any other error (including reaching EOF during
+   *         parsing) will cause an exception to be thrown.
+   */
+  public MessageType parseDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream)} but supporting extensions.
+   */
+  public MessageType parseDelimitedFrom(InputStream input,
+                                        ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+}
diff --git a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
index 0024f791..65d9270d 100644
--- a/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
+++ b/java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
@@ -37,22 +37,22 @@ import java.util.Collections;
 import java.util.List;
 
 /**
- * <code>RepeatedFieldBuilder</code> implements a structure that a protocol
+ * {@code RepeatedFieldBuilder} implements a structure that a protocol
  * message uses to hold a repeated field of other protocol messages. It supports
  * the classical use case of adding immutable {@link Message}'s to the
  * repeated field and is highly optimized around this (no extra memory
  * allocations and sharing of immutable arrays).
  * <br>
  * It also supports the additional use case of adding a {@link Message.Builder}
- * to the repeated field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * to the repeated field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
  * structure.
  * <br>
  * Logically, one can think of a tree of builders as converting the entire tree
  * to messages when build is called on the root or when any method is called
  * that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
  * classes cache messages that were created so that messages only need to be
  * created when some change occured in its builder or a builder for one of its
  * descendants.
@@ -192,7 +192,7 @@ public class RepeatedFieldBuilder
 
   /**
    * Get the message at the specified index. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it.
    *
    * @param index the index of the message to get
@@ -204,7 +204,7 @@ public class RepeatedFieldBuilder
 
   /**
    * Get the message at the specified index. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it.
    *
    * @param index the index of the message to get
diff --git a/java/src/main/java/com/google/protobuf/RopeByteString.java b/java/src/main/java/com/google/protobuf/RopeByteString.java
new file mode 100644
index 00000000..8d44d117
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/RopeByteString.java
@@ -0,0 +1,945 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.ByteArrayInputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Class to represent {@code ByteStrings} formed by concatenation of other
+ * ByteStrings, without copying the data in the pieces. The concatenation is
+ * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
+ *
+ * <p>Most of the operation here is inspired by the now-famous paper <a
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
+ * michael plass
+ *
+ * <p>The algorithms described in the paper have been implemented for character
+ * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
+ * cord.cc}.
+ *
+ * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
+ * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
+ * sequence length, sequences that are too short relative to their depth cause a
+ * tree rebalance.  More precisely, a tree of depth d is "balanced" in the
+ * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
+ * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
+ * lengths 1, 2, 3, 5, 8, 13,...
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class RopeByteString extends ByteString {
+
+  /**
+   * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
+   * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
+   * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
+   * least 2, of depth 4 must have length >= 8, etc.
+   *
+   * <p>There's nothing special about using the Fibonacci numbers for this, but
+   * they are a reasonable sequence for encapsulating the idea that we are OK
+   * with longer strings being encoded in deeper binary trees.
+   *
+   * <p>For 32-bit integers, this array has length 46.
+   */
+  private static final int[] minLengthByDepth;
+
+  static {
+    // Dynamically generate the list of Fibonacci numbers the first time this
+    // class is accessed.
+    List<Integer> numbers = new ArrayList<Integer>();
+
+    // we skip the first Fibonacci number (1).  So instead of: 1 1 2 3 5 8 ...
+    // we have: 1 2 3 5 8 ...
+    int f1 = 1;
+    int f2 = 1;
+
+    // get all the values until we roll over.
+    while (f2 > 0) {
+      numbers.add(f2);
+      int temp = f1 + f2;
+      f1 = f2;
+      f2 = temp;
+    }
+
+    // we include this here so that we can index this array to [x + 1] in the
+    // loops below.
+    numbers.add(Integer.MAX_VALUE);
+    minLengthByDepth = new int[numbers.size()];
+    for (int i = 0; i < minLengthByDepth.length; i++) {
+      // unbox all the values
+      minLengthByDepth[i] = numbers.get(i);
+    }
+  }
+
+  private final int totalLength;
+  private final ByteString left;
+  private final ByteString right;
+  private final int leftLength;
+  private final int treeDepth;
+
+  /**
+   * Create a new RopeByteString, which can be thought of as a new tree node, by
+   * recording references to the two given strings.
+   *
+   * @param left  string on the left of this node, should have {@code size() >
+   *              0}
+   * @param right string on the right of this node, should have {@code size() >
+   *              0}
+   */
+  private RopeByteString(ByteString left, ByteString right) {
+    this.left = left;
+    this.right = right;
+    leftLength = left.size();
+    totalLength = leftLength + right.size();
+    treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+  }
+
+  /**
+   * Concatenate the given strings while performing various optimizations to
+   * slow the growth rate of tree depth and tree node count. The result is
+   * either a {@link LiteralByteString} or a {@link RopeByteString}
+   * depending on which optimizations, if any, were applied.
+   *
+   * <p>Small pieces of length less than {@link
+   * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
+   * BAP95.  Large pieces are referenced without copy.
+   *
+   * @param left  string on the left
+   * @param right string on the right
+   * @return concatenation representing the same sequence as the given strings
+   */
+  static ByteString concatenate(ByteString left, ByteString right) {
+    ByteString result;
+    RopeByteString leftRope =
+        (left instanceof RopeByteString) ? (RopeByteString) left : null;
+    if (right.size() == 0) {
+      result = left;
+    } else if (left.size() == 0) {
+      result = right;
+    } else {
+      int newLength = left.size() + right.size();
+      if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
+        // Optimization from BAP95: For short (leaves in paper, but just short
+        // here) total length, do a copy of data to a new leaf.
+        result = concatenateBytes(left, right);
+      } else if (leftRope != null
+          && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
+        // Optimization from BAP95: As an optimization of the case where the
+        // ByteString is constructed by repeated concatenate, recognize the case
+        // where a short string is concatenated to a left-hand node whose
+        // right-hand branch is short.  In the paper this applies to leaves, but
+        // we just look at the length here. This has the advantage of shedding
+        // references to unneeded data when substrings have been taken.
+        //
+        // When we recognize this case, we do a copy of the data and create a
+        // new parent node so that the depth of the result is the same as the
+        // given left tree.
+        ByteString newRight = concatenateBytes(leftRope.right, right);
+        result = new RopeByteString(leftRope.left, newRight);
+      } else if (leftRope != null
+          && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
+          && leftRope.getTreeDepth() > right.getTreeDepth()) {
+        // Typically for concatenate-built strings the left-side is deeper than
+        // the right.  This is our final attempt to concatenate without
+        // increasing the tree depth.  We'll redo the the node on the RHS.  This
+        // is yet another optimization for building the string by repeatedly
+        // concatenating on the right.
+        ByteString newRight = new RopeByteString(leftRope.right, right);
+        result = new RopeByteString(leftRope.left, newRight);
+      } else {
+        // Fine, we'll add a node and increase the tree depth--unless we
+        // rebalance ;^)
+        int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+        if (newLength >= minLengthByDepth[newDepth]) {
+          // The tree is shallow enough, so don't rebalance
+          result = new RopeByteString(left, right);
+        } else {
+          result = new Balancer().balance(left, right);
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Concatenates two strings by copying data values. This is called in a few
+   * cases in order to reduce the growth of the number of tree nodes.
+   *
+   * @param left  string on the left
+   * @param right string on the right
+   * @return string formed by copying data bytes
+   */
+  private static LiteralByteString concatenateBytes(ByteString left,
+      ByteString right) {
+    int leftSize = left.size();
+    int rightSize = right.size();
+    byte[] bytes = new byte[leftSize + rightSize];
+    left.copyTo(bytes, 0, 0, leftSize);
+    right.copyTo(bytes, 0, leftSize, rightSize);
+    return new LiteralByteString(bytes);  // Constructor wraps bytes
+  }
+
+  /**
+   * Create a new RopeByteString for testing only while bypassing all the
+   * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
+   * testing trees of specific structure. We are also able to insert empty
+   * leaves, though these are dis-allowed, so that we can make sure the
+   * implementation can withstand their presence.
+   *
+   * @param left  string on the left of this node
+   * @param right string on the right of this node
+   * @return an unsafe instance for testing only
+   */
+  static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
+    return new RopeByteString(left, right);
+  }
+
+  /**
+   * Gets the byte at the given index.
+   * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
+   * reasons although it would more properly be {@link
+   * IndexOutOfBoundsException}.
+   *
+   * @param index index of byte
+   * @return the value
+   * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+   */
+  @Override
+  public byte byteAt(int index) {
+    if (index < 0) {
+      throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
+    }
+    if (index > totalLength) {
+      throw new ArrayIndexOutOfBoundsException(
+          "Index > length: " + index + ", " + totalLength);
+    }
+
+    byte result;
+    // Find the relevant piece by recursive descent
+    if (index < leftLength) {
+      result = left.byteAt(index);
+    } else {
+      result = right.byteAt(index - leftLength);
+    }
+    return result;
+  }
+
+  @Override
+  public int size() {
+    return totalLength;
+  }
+
+  // =================================================================
+  // Pieces
+
+  @Override
+  protected int getTreeDepth() {
+    return treeDepth;
+  }
+
+  /**
+   * Determines if the tree is balanced according to BAP95, which means the tree
+   * is flat-enough with respect to the bounds. Note that this definition of
+   * balanced is one where sub-trees of balanced trees are not necessarily
+   * balanced.
+   *
+   * @return true if the tree is balanced
+   */
+  @Override
+  protected boolean isBalanced() {
+    return totalLength >= minLengthByDepth[treeDepth];
+  }
+
+  /**
+   * Takes a substring of this one. This involves recursive descent along the
+   * left and right edges of the substring, and referencing any wholly contained
+   * segments in between. Any leaf nodes entirely uninvolved in the substring
+   * will not be referenced by the substring.
+   *
+   * <p>Substrings of {@code length < 2} should result in at most a single
+   * recursive call chain, terminating at a leaf node. Thus the result will be a
+   * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
+   * ByteString)}.
+   *
+   * @param beginIndex start at this index
+   * @param endIndex   the last character is the one before this index
+   * @return substring leaf node or tree
+   */
+  @Override
+  public ByteString substring(int beginIndex, int endIndex) {
+    if (beginIndex < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index: " + beginIndex + " < 0");
+    }
+    if (endIndex > totalLength) {
+      throw new IndexOutOfBoundsException(
+          "End index: " + endIndex + " > " + totalLength);
+    }
+    int substringLength = endIndex - beginIndex;
+    if (substringLength < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index larger than ending index: " + beginIndex + ", "
+              + endIndex);
+    }
+
+    ByteString result;
+    if (substringLength == 0) {
+      // Empty substring
+      result = ByteString.EMPTY;
+    } else if (substringLength == totalLength) {
+      // The whole string
+      result = this;
+    } else {
+      // Proper substring
+      if (endIndex <= leftLength) {
+        // Substring on the left
+        result = left.substring(beginIndex, endIndex);
+      } else if (beginIndex >= leftLength) {
+        // Substring on the right
+        result = right
+            .substring(beginIndex - leftLength, endIndex - leftLength);
+      } else {
+        // Split substring
+        ByteString leftSub = left.substring(beginIndex);
+        ByteString rightSub = right.substring(0, endIndex - leftLength);
+        // Intentionally not rebalancing, since in many cases these two
+        // substrings will already be less deep than the top-level
+        // RopeByteString we're taking a substring of.
+        result = new RopeByteString(leftSub, rightSub);
+      }
+    }
+    return result;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset,
+      int targetOffset, int numberToCopy) {
+   if (sourceOffset + numberToCopy <= leftLength) {
+      left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+    } else if (sourceOffset >= leftLength) {
+      right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
+          numberToCopy);
+    } else {
+      int leftLength = this.leftLength - sourceOffset;
+      left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
+      right.copyToInternal(target, 0, targetOffset + leftLength,
+          numberToCopy - leftLength);
+    }
+  }
+
+  @Override
+  public void copyTo(ByteBuffer target) {
+    left.copyTo(target);
+    right.copyTo(target);
+  }
+
+  @Override
+  public ByteBuffer asReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
+    return byteBuffer.asReadOnlyBuffer();
+  }
+
+  @Override
+  public List<ByteBuffer> asReadOnlyByteBufferList() {
+    // Walk through the list of LiteralByteString's that make up this
+    // rope, and add each one as a read-only ByteBuffer.
+    List<ByteBuffer> result = new ArrayList<ByteBuffer>();
+    PieceIterator pieces = new PieceIterator(this);
+    while (pieces.hasNext()) {
+      LiteralByteString byteString = pieces.next();
+      result.add(byteString.asReadOnlyByteBuffer());
+    }
+    return result;
+  }
+
+  @Override
+  public void writeTo(OutputStream outputStream) throws IOException {
+    left.writeTo(outputStream);
+    right.writeTo(outputStream);
+  }
+
+  @Override
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    return new String(toByteArray(), charsetName);
+  }
+
+  // =================================================================
+  // UTF-8 decoding
+
+  @Override
+  public boolean isValidUtf8() {
+    int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
+    int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
+    return state == Utf8.COMPLETE;
+  }
+
+  @Override
+  protected int partialIsValidUtf8(int state, int offset, int length) {
+    int toIndex = offset + length;
+    if (toIndex <= leftLength) {
+      return left.partialIsValidUtf8(state, offset, length);
+    } else if (offset >= leftLength) {
+      return right.partialIsValidUtf8(state, offset - leftLength, length);
+    } else {
+      int leftLength = this.leftLength - offset;
+      int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
+      return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
+    }
+  }
+
+  // =================================================================
+  // equals() and hashCode()
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof ByteString)) {
+      return false;
+    }
+
+    ByteString otherByteString = (ByteString) other;
+    if (totalLength != otherByteString.size()) {
+      return false;
+    }
+    if (totalLength == 0) {
+      return true;
+    }
+
+    // You don't really want to be calling equals on long strings, but since
+    // we cache the hashCode, we effectively cache inequality. We use the cached
+    // hashCode if it's already computed.  It's arguable we should compute the
+    // hashCode here, and if we're going to be testing a bunch of byteStrings,
+    // it might even make sense.
+    if (hash != 0) {
+      int cachedOtherHash = otherByteString.peekCachedHashCode();
+      if (cachedOtherHash != 0 && hash != cachedOtherHash) {
+        return false;
+      }
+    }
+
+    return equalsFragments(otherByteString);
+  }
+
+  /**
+   * Determines if this string is equal to another of the same length by
+   * iterating over the leaf nodes. On each step of the iteration, the
+   * overlapping segments of the leaves are compared.
+   *
+   * @param other string of the same length as this one
+   * @return true if the values of this string equals the value of the given
+   *         one
+   */
+  private boolean equalsFragments(ByteString other) {
+    int thisOffset = 0;
+    Iterator<LiteralByteString> thisIter = new PieceIterator(this);
+    LiteralByteString thisString = thisIter.next();
+
+    int thatOffset = 0;
+    Iterator<LiteralByteString> thatIter = new PieceIterator(other);
+    LiteralByteString thatString = thatIter.next();
+
+    int pos = 0;
+    while (true) {
+      int thisRemaining = thisString.size() - thisOffset;
+      int thatRemaining = thatString.size() - thatOffset;
+      int bytesToCompare = Math.min(thisRemaining, thatRemaining);
+
+      // At least one of the offsets will be zero
+      boolean stillEqual = (thisOffset == 0)
+          ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
+          : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
+      if (!stillEqual) {
+        return false;
+      }
+
+      pos += bytesToCompare;
+      if (pos >= totalLength) {
+        if (pos == totalLength) {
+          return true;
+        }
+        throw new IllegalStateException();
+      }
+      // We always get to the end of at least one of the pieces
+      if (bytesToCompare == thisRemaining) { // If reached end of this
+        thisOffset = 0;
+        thisString = thisIter.next();
+      } else {
+        thisOffset += bytesToCompare;
+      }
+      if (bytesToCompare == thatRemaining) { // If reached end of that
+        thatOffset = 0;
+        thatString = thatIter.next();
+      } else {
+        thatOffset += bytesToCompare;
+      }
+    }
+  }
+
+  /**
+   * Cached hash value.  Intentionally accessed via a data race, which is safe
+   * because of the Java Memory Model's "no out-of-thin-air values" guarantees
+   * for ints.
+   */
+  private int hash = 0;
+
+  @Override
+  public int hashCode() {
+    int h = hash;
+
+    if (h == 0) {
+      h = totalLength;
+      h = partialHash(h, 0, totalLength);
+      if (h == 0) {
+        h = 1;
+      }
+      hash = h;
+    }
+    return h;
+  }
+
+  @Override
+  protected int peekCachedHashCode() {
+    return hash;
+  }
+
+  @Override
+  protected int partialHash(int h, int offset, int length) {
+    int toIndex = offset + length;
+    if (toIndex <= leftLength) {
+      return left.partialHash(h, offset, length);
+    } else if (offset >= leftLength) {
+      return right.partialHash(h, offset - leftLength, length);
+    } else {
+      int leftLength = this.leftLength - offset;
+      int leftPartial = left.partialHash(h, offset, leftLength);
+      return right.partialHash(leftPartial, 0, length - leftLength);
+    }
+  }
+
+  // =================================================================
+  // Input stream
+
+  @Override
+  public CodedInputStream newCodedInput() {
+    return CodedInputStream.newInstance(new RopeInputStream());
+  }
+
+  @Override
+  public InputStream newInput() {
+    return new RopeInputStream();
+  }
+
+  /**
+   * This class implements the balancing algorithm of BAP95. In the paper the
+   * authors use an array to keep track of pieces, while here we use a stack.
+   * The tree is balanced by traversing subtrees in left to right order, and the
+   * stack always contains the part of the string we've traversed so far.
+   *
+   * <p>One surprising aspect of the algorithm is the result of balancing is not
+   * necessarily balanced, though it is nearly balanced.  For details, see
+   * BAP95.
+   */
+  private static class Balancer {
+    // Stack containing the part of the string, starting from the left, that
+    // we've already traversed.  The final string should be the equivalent of
+    // concatenating the strings on the stack from bottom to top.
+    private final Deque<ByteString> prefixesStack =
+        new ArrayDeque<ByteString>(minLengthByDepth.length);
+
+    private ByteString balance(ByteString left, ByteString right) {
+      doBalance(left);
+      doBalance(right);
+
+      // Sweep stack to gather the result
+      ByteString partialString = prefixesStack.pop();
+      while (!prefixesStack.isEmpty()) {
+        ByteString newLeft = prefixesStack.pop();
+        partialString = new RopeByteString(newLeft, partialString);
+      }
+      // We should end up with a RopeByteString since at a minimum we will
+      // create one from concatenating left and right
+      return partialString;
+    }
+
+    private void doBalance(ByteString root) {
+      // BAP95: Insert balanced subtrees whole. This means the result might not
+      // be balanced, leading to repeated rebalancings on concatenate. However,
+      // these rebalancings are shallow due to ignoring balanced subtrees, and
+      // relatively few calls to insert() result.
+      if (root.isBalanced()) {
+        insert(root);
+      } else if (root instanceof RopeByteString) {
+        RopeByteString rbs = (RopeByteString) root;
+        doBalance(rbs.left);
+        doBalance(rbs.right);
+      } else {
+        throw new IllegalArgumentException(
+            "Has a new type of ByteString been created? Found " +
+                root.getClass());
+      }
+    }
+
+    /**
+     * Push a string on the balance stack (BAP95).  BAP95 uses an array and
+     * calls the elements in the array 'bins'.  We instead use a stack, so the
+     * 'bins' of lengths are represented by differences between the elements of
+     * minLengthByDepth.
+     *
+     * <p>If the length bin for our string, and all shorter length bins, are
+     * empty, we just push it on the stack.  Otherwise, we need to start
+     * concatenating, putting the given string in the "middle" and continuing
+     * until we land in an empty length bin that matches the length of our
+     * concatenation.
+     *
+     * @param byteString string to place on the balance stack
+     */
+    private void insert(ByteString byteString) {
+      int depthBin = getDepthBinForLength(byteString.size());
+      int binEnd = minLengthByDepth[depthBin + 1];
+
+      // BAP95: Concatenate all trees occupying bins representing the length of
+      // our new piece or of shorter pieces, to the extent that is possible.
+      // The goal is to clear the bin which our piece belongs in, but that may
+      // not be entirely possible if there aren't enough longer bins occupied.
+      if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
+        prefixesStack.push(byteString);
+      } else {
+        int binStart = minLengthByDepth[depthBin];
+
+        // Concatenate the subtrees of shorter length
+        ByteString newTree = prefixesStack.pop();
+        while (!prefixesStack.isEmpty()
+            && prefixesStack.peek().size() < binStart) {
+          ByteString left = prefixesStack.pop();
+          newTree = new RopeByteString(left, newTree);
+        }
+
+        // Concatenate the given string
+        newTree = new RopeByteString(newTree, byteString);
+
+        // Continue concatenating until we land in an empty bin
+        while (!prefixesStack.isEmpty()) {
+          depthBin = getDepthBinForLength(newTree.size());
+          binEnd = minLengthByDepth[depthBin + 1];
+          if (prefixesStack.peek().size() < binEnd) {
+            ByteString left = prefixesStack.pop();
+            newTree = new RopeByteString(left, newTree);
+          } else {
+            break;
+          }
+        }
+        prefixesStack.push(newTree);
+      }
+    }
+
+    private int getDepthBinForLength(int length) {
+      int depth = Arrays.binarySearch(minLengthByDepth, length);
+      if (depth < 0) {
+        // It wasn't an exact match, so convert to the index of the containing
+        // fragment, which is one less even than the insertion point.
+        int insertionPoint = -(depth + 1);
+        depth = insertionPoint - 1;
+      }
+
+      return depth;
+    }
+  }
+
+  /**
+   * This class is a continuable tree traversal, which keeps the state
+   * information which would exist on the stack in a recursive traversal instead
+   * on a stack of "Bread Crumbs". The maximum depth of the stack in this
+   * iterator is the same as the depth of the tree being traversed.
+   *
+   * <p>This iterator is used to implement
+   * {@link RopeByteString#equalsFragments(ByteString)}.
+   */
+  private static class PieceIterator implements Iterator<LiteralByteString> {
+
+    private final Deque<RopeByteString> breadCrumbs =
+        new ArrayDeque<RopeByteString>(minLengthByDepth.length);
+    private LiteralByteString next;
+
+    private PieceIterator(ByteString root) {
+      next = getLeafByLeft(root);
+    }
+
+    private LiteralByteString getLeafByLeft(ByteString root) {
+      ByteString pos = root;
+      while (pos instanceof RopeByteString) {
+        RopeByteString rbs = (RopeByteString) pos;
+        breadCrumbs.push(rbs);
+        pos = rbs.left;
+      }
+      return (LiteralByteString) pos;
+    }
+
+    private LiteralByteString getNextNonEmptyLeaf() {
+      while (true) {
+        // Almost always, we go through this loop exactly once.  However, if
+        // we discover an empty string in the rope, we toss it and try again.
+        if (breadCrumbs.isEmpty()) {
+          return null;
+        } else {
+          LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
+          if (!result.isEmpty()) {
+            return result;
+          }
+        }
+      }
+    }
+
+    public boolean hasNext() {
+      return next != null;
+    }
+
+    /**
+     * Returns the next item and advances one {@code LiteralByteString}.
+     *
+     * @return next non-empty LiteralByteString or {@code null}
+     */
+    public LiteralByteString next() {
+      if (next == null) {
+        throw new NoSuchElementException();
+      }
+      LiteralByteString result = next;
+      next = getNextNonEmptyLeaf();
+      return result;
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new RopeByteIterator();
+  }
+
+  private class RopeByteIterator implements ByteString.ByteIterator {
+
+    private final PieceIterator pieces;
+    private ByteIterator bytes;
+    int bytesRemaining;
+
+    private RopeByteIterator() {
+      pieces = new PieceIterator(RopeByteString.this);
+      bytes = pieces.next().iterator();
+      bytesRemaining = size();
+    }
+
+    public boolean hasNext() {
+      return (bytesRemaining > 0);
+    }
+
+    public Byte next() {
+      return nextByte(); // Does not instantiate a Byte
+    }
+
+    public byte nextByte() {
+      if (!bytes.hasNext()) {
+        bytes = pieces.next().iterator();
+      }
+      --bytesRemaining;
+      return bytes.nextByte();
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  /**
+   * This class is the {@link RopeByteString} equivalent for
+   * {@link ByteArrayInputStream}.
+   */
+  private class RopeInputStream extends InputStream {
+    // Iterates through the pieces of the rope
+    private PieceIterator pieceIterator;
+    // The current piece
+    private LiteralByteString currentPiece;
+    // The size of the current piece
+    private int currentPieceSize;
+    // The index of the next byte to read in the current piece
+    private int currentPieceIndex;
+    // The offset of the start of the current piece in the rope byte string
+    private int currentPieceOffsetInRope;
+    // Offset in the buffer at which user called mark();
+    private int mark;
+
+    public RopeInputStream() {
+      initialize();
+    }
+
+    @Override
+    public int read(byte b[], int offset, int length)  {
+      if (b == null) {
+        throw new NullPointerException();
+      } else if (offset < 0 || length < 0 || length > b.length - offset) {
+        throw new IndexOutOfBoundsException();
+      }
+      return readSkipInternal(b, offset, length);
+    }
+
+    @Override
+    public long skip(long length) {
+      if (length < 0) {
+        throw new IndexOutOfBoundsException();
+      } else if (length > Integer.MAX_VALUE) {
+        length = Integer.MAX_VALUE;
+      }
+      return readSkipInternal(null, 0, (int) length);
+    }
+
+    /**
+     * Internal implementation of read and skip.  If b != null, then read the
+     * next {@code length} bytes into the buffer {@code b} at
+     * offset {@code offset}.  If b == null, then skip the next {@code length)
+     * bytes.
+     * <p>
+     * This method assumes that all error checking has already happened.
+     * <p>
+     * Returns the actual number of bytes read or skipped.
+     */
+    private int readSkipInternal(byte b[], int offset, int length)  {
+      int bytesRemaining = length;
+      while (bytesRemaining > 0) {
+        advanceIfCurrentPieceFullyRead();
+        if (currentPiece == null) {
+          if (bytesRemaining == length) {
+             // We didn't manage to read anything
+             return -1;
+           }
+          break;
+        } else {
+          // Copy the bytes from this piece.
+          int currentPieceRemaining = currentPieceSize - currentPieceIndex;
+          int count = Math.min(currentPieceRemaining, bytesRemaining);
+          if (b != null) {
+            currentPiece.copyTo(b, currentPieceIndex, offset, count);
+            offset += count;
+          }
+          currentPieceIndex += count;
+          bytesRemaining -= count;
+        }
+      }
+       // Return the number of bytes read.
+      return length - bytesRemaining;
+    }
+
+    @Override
+    public int read() throws IOException {
+      advanceIfCurrentPieceFullyRead();
+      if (currentPiece == null) {
+        return -1;
+      } else {
+        return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
+      }
+    }
+
+    @Override
+    public int available() throws IOException {
+      int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
+      return RopeByteString.this.size() - bytesRead;
+    }
+
+    @Override
+    public boolean markSupported() {
+      return true;
+    }
+
+    @Override
+    public void mark(int readAheadLimit) {
+      // Set the mark to our position in the byte string
+      mark = currentPieceOffsetInRope + currentPieceIndex;
+    }
+
+    @Override
+    public synchronized void reset() {
+      // Just reinitialize and skip the specified number of bytes.
+      initialize();
+      readSkipInternal(null, 0, mark);
+    }
+
+    /** Common initialization code used by both the constructor and reset() */
+    private void initialize() {
+      pieceIterator = new PieceIterator(RopeByteString.this);
+      currentPiece = pieceIterator.next();
+      currentPieceSize = currentPiece.size();
+      currentPieceIndex = 0;
+      currentPieceOffsetInRope = 0;
+    }
+
+    /**
+     * Skips to the next piece if we have read all the data in the current
+     * piece.  Sets currentPiece to null if we have reached the end of the
+     * input.
+     */
+    private void advanceIfCurrentPieceFullyRead() {
+      if (currentPiece != null && currentPieceIndex == currentPieceSize) {
+        // Generally, we can only go through this loop at most once, since
+        // empty strings can't end up in a rope.  But better to test.
+        currentPieceOffsetInRope += currentPieceSize;
+        currentPieceIndex = 0;
+        if (pieceIterator.hasNext()) {
+          currentPiece = pieceIterator.next();
+          currentPieceSize = currentPiece.size();
+        } else {
+          currentPiece = null;
+          currentPieceSize = 0;
+        }
+      }
+    }
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
index d4475f66..4bfc9f34 100644
--- a/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
+++ b/java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
@@ -31,21 +31,21 @@
 package com.google.protobuf;
 
 /**
- * <code>SingleFieldBuilder</code> implements a structure that a protocol
+ * {@code SingleFieldBuilder} implements a structure that a protocol
  * message uses to hold a single field of another protocol message. It supports
  * the classical use case of setting an immutable {@link Message} as the value
  * of the field and is highly optimized around this.
  * <br>
  * It also supports the additional use case of setting a {@link Message.Builder}
- * as the field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * as the field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
  * structure.
  * <br>
  * Logically, one can think of a tree of builders as converting the entire tree
  * to messages when build is called on the root or when any method is called
  * that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
  * classes cache messages that were created so that messages only need to be
  * created when some change occured in its builder or a builder for one of its
  * descendants.
@@ -99,7 +99,7 @@ public class SingleFieldBuilder
 
   /**
    * Get the message for the field. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it. If no message has
    * been set, returns the default instance of the message.
    *
diff --git a/java/src/main/java/com/google/protobuf/SmallSortedMap.java b/java/src/main/java/com/google/protobuf/SmallSortedMap.java
index 1cf270f3..c6cad6af 100644
--- a/java/src/main/java/com/google/protobuf/SmallSortedMap.java
+++ b/java/src/main/java/com/google/protobuf/SmallSortedMap.java
@@ -51,14 +51,14 @@ import java.util.SortedMap;
  * remaining entries are stored in an overflow map. Iteration over the entries
  * in the map should be done as follows:
  *
- * <pre>
- * for (int i = 0; i &lt; fieldMap.getNumArrayEntries(); i++) {
+ * <pre>   {@code
+ * for (int i = 0; i < fieldMap.getNumArrayEntries(); i++) {
  *   process(fieldMap.getArrayEntryAt(i));
  * }
- * for (Map.Entry&lt;K, V&gt; entry : fieldMap.getOverflowEntries()) {
+ * for (Map.Entry<K, V> entry : fieldMap.getOverflowEntries()) {
  *   process(entry);
  * }
- * </pre>
+ * }</pre>
  *
  * The resulting iteration is in order of ascending field tag number. The
  * object returned by {@link #entrySet()} adheres to the same contract but is
@@ -394,7 +394,7 @@ class SmallSortedMap<K extends Comparable<K>, V> extends AbstractMap<K, V> {
 
   /**
    * Entry implementation that implements Comparable in order to support
-   * binary search witin the entry array. Also checks mutability in
+   * binary search within the entry array. Also checks mutability in
    * {@link #setValue()}.
    */
   private class Entry implements Map.Entry<K, V>, Comparable<Entry> {
diff --git a/java/src/main/java/com/google/protobuf/TextFormat.java b/java/src/main/java/com/google/protobuf/TextFormat.java
index d5fbdabf..ed462899 100644
--- a/java/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/src/main/java/com/google/protobuf/TextFormat.java
@@ -55,15 +55,18 @@ import java.util.regex.Pattern;
 public final class TextFormat {
   private TextFormat() {}
 
-  private static final Printer DEFAULT_PRINTER = new Printer(false);
-  private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
+  private static final Printer DEFAULT_PRINTER = new Printer();
+  private static final Printer SINGLE_LINE_PRINTER =
+      (new Printer()).setSingleLineMode(true);
+  private static final Printer UNICODE_PRINTER =
+      (new Printer()).setEscapeNonAscii(false);
 
   /**
    * Outputs a textual representation of the Protocol Message supplied into
    * the parameter output. (This representation is the new version of the
    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
    */
-  public static void print(final Message message, final Appendable output)
+  public static void print(final MessageOrBuilder message, final Appendable output)
                            throws IOException {
     DEFAULT_PRINTER.print(message, new TextGenerator(output));
   }
@@ -79,7 +82,7 @@ public final class TextFormat {
    * Generates a human readable form of this message, useful for debugging and
    * other purposes, with no newline characters.
    */
-  public static String shortDebugString(final Message message) {
+  public static String shortDebugString(final MessageOrBuilder message) {
     try {
       final StringBuilder sb = new StringBuilder();
       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
@@ -109,7 +112,7 @@ public final class TextFormat {
    * Like {@code print()}, but writes directly to a {@code String} and
    * returns it.
    */
-  public static String printToString(final Message message) {
+  public static String printToString(final MessageOrBuilder message) {
     try {
       final StringBuilder text = new StringBuilder();
       print(message, text);
@@ -133,6 +136,34 @@ public final class TextFormat {
     }
   }
 
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final MessageOrBuilder message) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.print(message, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final UnknownFieldSet fields) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
   public static void printField(final FieldDescriptor field,
                                 final Object value,
                                 final Appendable output)
@@ -216,13 +247,26 @@ public final class TextFormat {
   /** Helper class for converting protobufs to text. */
   private static final class Printer {
     /** Whether to omit newlines from the output. */
-    final boolean singleLineMode;
+    boolean singleLineMode = false;
+
+    /** Whether to escape non ASCII characters with backslash and octal. */
+    boolean escapeNonAscii = true;
+
+    private Printer() {}
 
-    private Printer(final boolean singleLineMode) {
+    /** Setter of singleLineMode */
+    private Printer setSingleLineMode(boolean singleLineMode) {
       this.singleLineMode = singleLineMode;
+      return this;
+    }
+
+    /** Setter of escapeNonAscii */
+    private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+      this.escapeNonAscii = escapeNonAscii;
+      return this;
     }
 
-    private void print(final Message message, final TextGenerator generator)
+    private void print(final MessageOrBuilder message, final TextGenerator generator)
         throws IOException {
       for (Map.Entry<FieldDescriptor, Object> field
           : message.getAllFields().entrySet()) {
@@ -339,7 +383,9 @@ public final class TextFormat {
 
         case STRING:
           generator.print("\"");
-          generator.print(escapeText((String) value));
+          generator.print(escapeNonAscii ?
+              escapeText((String) value) :
+              (String) value);
           generator.print("\"");
           break;
 
@@ -541,7 +587,7 @@ public final class TextFormat {
     private int previousLine = 0;
     private int previousColumn = 0;
 
-    // We use possesive quantifiers (*+ and ++) because otherwise the Java
+    // We use possessive quantifiers (*+ and ++) because otherwise the Java
     // regex matcher has stack overflows on large inputs.
     private static final Pattern WHITESPACE =
       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -864,7 +910,7 @@ public final class TextFormat {
     public ParseException parseException(final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (line + 1) + ":" + (column + 1) + ": " + description);
+        line + 1, column + 1, description);
     }
 
     /**
@@ -875,7 +921,7 @@ public final class TextFormat {
         final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+        previousLine + 1, previousColumn + 1, description);
     }
 
     /**
@@ -900,8 +946,45 @@ public final class TextFormat {
   public static class ParseException extends IOException {
     private static final long serialVersionUID = 3196188060225107702L;
 
+    private final int line;
+    private final int column;
+
+    /** Create a new instance, with -1 as the line and column numbers. */
     public ParseException(final String message) {
-      super(message);
+      this(-1, -1, message);
+    }
+
+    /**
+     * Create a new instance
+     *
+     * @param line the line number where the parse error occurred,
+     * using 1-offset.
+     * @param column the column number where the parser error occurred,
+     * using 1-offset.
+     */
+    public ParseException(final int line, final int column,
+        final String message) {
+      super(Integer.toString(line) + ":" + column + ": " + message);
+      this.line = line;
+      this.column = column;
+    }
+
+    /**
+     * Return the line where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getLine() {
+      return line;
+    }
+
+    /**
+     * Return the column where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getColumn() {
+      return column;
     }
   }
 
@@ -1073,7 +1156,7 @@ public final class TextFormat {
         mergeField(tokenizer, extensionRegistry, subBuilder);
       }
 
-      value = subBuilder.build();
+      value = subBuilder.buildPartial();
 
     } else {
       tokenizer.consume(":");
@@ -1212,7 +1295,7 @@ public final class TextFormat {
    */
   static ByteString unescapeBytes(final CharSequence charString)
       throws InvalidEscapeSequenceException {
-    // First convert the Java characater sequence to UTF-8 bytes.
+    // First convert the Java character sequence to UTF-8 bytes.
     ByteString input = ByteString.copyFromUtf8(charString.toString());
     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
     // escapes can all be expressed with ASCII characters, so it is safe to
@@ -1349,7 +1432,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static int parseInt32(final String text) throws NumberFormatException {
     return (int) parseInteger(text, true, false);
@@ -1358,7 +1441,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code int} when returned since Java has
    * no unsigned integer type.
    */
@@ -1369,7 +1452,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static long parseInt64(final String text) throws NumberFormatException {
     return parseInteger(text, true, true);
@@ -1378,7 +1461,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code long} when returned since Java has
    * no unsigned long type.
    */
diff --git a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
index 26a15d00..45e2e6e4 100644
--- a/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
+++ b/java/src/main/java/com/google/protobuf/UnknownFieldSet.java
@@ -46,7 +46,7 @@ import java.util.TreeMap;
  * {@code UnknownFieldSet} is used to keep track of fields which were seen when
  * parsing a protocol message but whose field numbers or types are unrecognized.
  * This most frequently occurs when new fields are added to a message type
- * and then messages containing those feilds are read by old software that was
+ * and then messages containing those fields are read by old software that was
  * compiled before the new types were added.
  *
  * <p>Every {@link Message} contains an {@code UnknownFieldSet} (and every
@@ -468,7 +468,7 @@ public final class UnknownFieldSet implements MessageLite {
     /**
      * Parse a single field from {@code input} and merge it into this set.
      * @param tag The field's tag number, which was already parsed.
-     * @return {@code false} if the tag is an engroup tag.
+     * @return {@code false} if the tag is an end group tag.
      */
     public boolean mergeFieldFrom(final int tag, final CodedInputStream input)
                                   throws IOException {
@@ -950,4 +950,29 @@ public final class UnknownFieldSet implements MessageLite {
       }
     }
   }
+
+  /**
+   * Parser to implement MessageLite interface.
+   */
+  public static final class Parser extends AbstractParser<UnknownFieldSet> {
+    public UnknownFieldSet parsePartialFrom(
+        CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+        throws InvalidProtocolBufferException {
+      Builder builder = newBuilder();
+      try {
+        builder.mergeFrom(input);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(builder.buildPartial());
+      } catch (IOException e) {
+        throw new InvalidProtocolBufferException(e.getMessage())
+            .setUnfinishedMessage(builder.buildPartial());
+      }
+      return builder.buildPartial();
+    }
+  }
+
+  private static final Parser PARSER = new Parser();
+  public final Parser getParserForType() {
+    return PARSER;
+  }
 }
diff --git a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
index 83e5c796..f80f0968 100644
--- a/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
+++ b/java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
@@ -32,6 +32,7 @@ package com.google.protobuf;
 
 import java.util.AbstractList;
 import java.util.RandomAccess;
+import java.util.List;
 import java.util.ListIterator;
 import java.util.Iterator;
 
@@ -143,4 +144,10 @@ public class UnmodifiableLazyStringList extends AbstractList<String>
       }
     };
   }
+
+  @Override
+  public List<?> getUnderlyingElements() {
+    // The returned value is already unmodifiable.
+    return list.getUnderlyingElements();
+  }
 }
diff --git a/java/src/main/java/com/google/protobuf/Utf8.java b/java/src/main/java/com/google/protobuf/Utf8.java
new file mode 100644
index 00000000..388f7fc5
--- /dev/null
+++ b/java/src/main/java/com/google/protobuf/Utf8.java
@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * A set of low-level, high-performance static utility methods related
+ * to the UTF-8 character encoding.  This class has no dependencies
+ * outside of the core JDK libraries.
+ *
+ * <p>There are several variants of UTF-8.  The one implemented by
+ * this class is the restricted definition of UTF-8 introduced in
+ * Unicode 3.1, which mandates the rejection of "overlong" byte
+ * sequences as well as rejection of 3-byte surrogate codepoint byte
+ * sequences.  Note that the UTF-8 decoder included in Oracle's JDK
+ * has been modified to also reject "overlong" byte sequences, but (as
+ * of 2011) still accepts 3-byte surrogate codepoint byte sequences.
+ *
+ * <p>The byte sequences considered valid by this class are exactly
+ * those that can be roundtrip converted to Strings and back to bytes
+ * using the UTF-8 charset, without loss: <pre> {@code
+ * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * <p>This class supports decoding of partial byte sequences, so that the
+ * bytes in a complete UTF-8 byte sequences can be stored in multiple
+ * segments.  Methods typically return {@link #MALFORMED} if the partial
+ * byte sequence is definitely not well-formed, {@link #COMPLETE} if it is
+ * well-formed in the absence of additional input, or if the byte sequence
+ * apparently terminated in the middle of a character, an opaque integer
+ * "state" value containing enough information to decode the character when
+ * passed to a subsequent invocation of a partial decoding method.
+ *
+ * @author martinrb@google.com (Martin Buchholz)
+ */
+final class Utf8 {
+  private Utf8() {}
+
+  /**
+   * State value indicating that the byte sequence is well-formed and
+   * complete (no further bytes are needed to complete a character).
+   */
+  public static final int COMPLETE = 0;
+
+  /**
+   * State value indicating that the byte sequence is definitely not
+   * well-formed.
+   */
+  public static final int MALFORMED = -1;
+
+  // Other state values include the partial bytes of the incomplete
+  // character to be decoded in the simplest way: we pack the bytes
+  // into the state int in little-endian order.  For example:
+  //
+  // int state = byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+  //
+  // Such a state is unpacked thus (note the ~ operation for byte2 to
+  // undo byte1's sign-extension bits):
+  //
+  // int byte1 = (byte) state;
+  // int byte2 = (byte) ~(state >> 8);
+  // int byte3 = (byte) (state >> 16);
+  //
+  // We cannot store a zero byte in the state because it would be
+  // indistinguishable from the absence of a byte.  But we don't need
+  // to, because partial bytes must always be negative.  When building
+  // a state, we ensure that byte1 is negative and subsequent bytes
+  // are valid trailing bytes.
+
+  /**
+   * Returns {@code true} if the given byte array is a well-formed
+   * UTF-8 byte sequence.
+   *
+   * <p>This is a convenience method, equivalent to a call to {@code
+   * isValidUtf8(bytes, 0, bytes.length)}.
+   */
+  public static boolean isValidUtf8(byte[] bytes) {
+    return isValidUtf8(bytes, 0, bytes.length);
+  }
+
+  /**
+   * Returns {@code true} if the given byte array slice is a
+   * well-formed UTF-8 byte sequence.  The range of bytes to be
+   * checked extends from index {@code index}, inclusive, to {@code
+   * limit}, exclusive.
+   *
+   * <p>This is a convenience method, equivalent to {@code
+   * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+   */
+  public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
+    return partialIsValidUtf8(bytes, index, limit) == COMPLETE;
+  }
+
+  /**
+   * Tells whether the given byte array slice is a well-formed,
+   * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
+   * to be checked extends from index {@code index}, inclusive, to
+   * {@code limit}, exclusive.
+   *
+   * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding
+   * operation) or the value returned from a call to a partial decoding method
+   * for the previous bytes
+   *
+   * @return {@link #MALFORMED} if the partial byte sequence is
+   * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+   * (no additional input needed), or if the byte sequence is
+   * "incomplete", i.e. apparently terminated in the middle of a character,
+   * an opaque integer "state" value containing enough information to
+   * decode the character when passed to a subsequent invocation of a
+   * partial decoding method.
+   */
+  public static int partialIsValidUtf8(
+      int state, byte[] bytes, int index, int limit) {
+    if (state != COMPLETE) {
+      // The previous decoding operation was incomplete (or malformed).
+      // We look for a well-formed sequence consisting of bytes from
+      // the previous decoding operation (stored in state) together
+      // with bytes from the array slice.
+      //
+      // We expect such "straddler characters" to be rare.
+
+      if (index >= limit) {  // No bytes? No progress.
+        return state;
+      }
+      int byte1 = (byte) state;
+      // byte1 is never ASCII.
+      if (byte1 < (byte) 0xE0) {
+        // two-byte form
+
+        // Simultaneously checks for illegal trailing-byte in
+        // leading position and overlong 2-byte form.
+        if (byte1 < (byte) 0xC2 ||
+            // byte2 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else if (byte1 < (byte) 0xF0) {
+        // three-byte form
+
+        // Get byte2 from saved state or array
+        int byte2 = (byte) ~(state >> 8);
+        if (byte2 == 0) {
+          byte2 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2);
+          }
+        }
+        if (byte2 > (byte) 0xBF ||
+            // overlong? 5 most significant bits must not all be zero
+            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+            // illegal surrogate codepoint?
+            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else {
+        // four-byte form
+
+        // Get byte2 and byte3 from saved state or array
+        int byte2 = (byte) ~(state >> 8);
+        int byte3 = 0;
+        if (byte2 == 0) {
+          byte2 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2);
+          }
+        } else {
+          byte3 = (byte) (state >> 16);
+        }
+        if (byte3 == 0) {
+          byte3 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2, byte3);
+          }
+        }
+
+        // If we were called with state == MALFORMED, then byte1 is 0xFF,
+        // which never occurs in well-formed UTF-8, and so we will return
+        // MALFORMED again below.
+
+        if (byte2 > (byte) 0xBF ||
+            // Check that 1 <= plane <= 16.  Tricky optimized form of:
+            // if (byte1 > (byte) 0xF4 ||
+            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+            // byte3 trailing-byte test
+            byte3 > (byte) 0xBF ||
+            // byte4 trailing-byte test
+             bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      }
+    }
+
+    return partialIsValidUtf8(bytes, index, limit);
+  }
+
+  /**
+   * Tells whether the given byte array slice is a well-formed,
+   * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
+   * to be checked extends from index {@code index}, inclusive, to
+   * {@code limit}, exclusive.
+   *
+   * <p>This is a convenience method, equivalent to a call to {@code
+   * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}.
+   *
+   * @return {@link #MALFORMED} if the partial byte sequence is
+   * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+   * (no additional input needed), or if the byte sequence is
+   * "incomplete", i.e. apparently terminated in the middle of a character,
+   * an opaque integer "state" value containing enough information to
+   * decode the character when passed to a subsequent invocation of a
+   * partial decoding method.
+   */
+  public static int partialIsValidUtf8(
+      byte[] bytes, int index, int limit) {
+    // Optimize for 100% ASCII.
+    // Hotspot loves small simple top-level loops like this.
+    while (index < limit && bytes[index] >= 0) {
+      index++;
+    }
+
+    return (index >= limit) ? COMPLETE :
+        partialIsValidUtf8NonAscii(bytes, index, limit);
+  }
+
+  private static int partialIsValidUtf8NonAscii(
+      byte[] bytes, int index, int limit) {
+    for (;;) {
+      int byte1, byte2;
+
+      // Optimize for interior runs of ASCII bytes.
+      do {
+        if (index >= limit) {
+          return COMPLETE;
+        }
+      } while ((byte1 = bytes[index++]) >= 0);
+
+      if (byte1 < (byte) 0xE0) {
+        // two-byte form
+
+        if (index >= limit) {
+          return byte1;
+        }
+
+        // Simultaneously checks for illegal trailing-byte in
+        // leading position and overlong 2-byte form.
+        if (byte1 < (byte) 0xC2 ||
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else if (byte1 < (byte) 0xF0) {
+        // three-byte form
+
+        if (index >= limit - 1) { // incomplete sequence
+          return incompleteStateFor(bytes, index, limit);
+        }
+        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+            // overlong? 5 most significant bits must not all be zero
+            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+            // check for illegal surrogate codepoints
+            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else {
+        // four-byte form
+
+        if (index >= limit - 2) {  // incomplete sequence
+          return incompleteStateFor(bytes, index, limit);
+        }
+        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+            // Check that 1 <= plane <= 16.  Tricky optimized form of:
+            // if (byte1 > (byte) 0xF4 ||
+            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF ||
+            // byte4 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      }
+    }
+  }
+
+  private static int incompleteStateFor(int byte1) {
+    return (byte1 > (byte) 0xF4) ?
+        MALFORMED : byte1;
+  }
+
+  private static int incompleteStateFor(int byte1, int byte2) {
+    return (byte1 > (byte) 0xF4 ||
+            byte2 > (byte) 0xBF) ?
+        MALFORMED : byte1 ^ (byte2 << 8);
+  }
+
+  private static int incompleteStateFor(int byte1, int byte2, int byte3) {
+    return (byte1 > (byte) 0xF4 ||
+            byte2 > (byte) 0xBF ||
+            byte3 > (byte) 0xBF) ?
+        MALFORMED : byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+  }
+
+  private static int incompleteStateFor(byte[] bytes, int index, int limit) {
+    int byte1 = bytes[index - 1];
+    switch (limit - index) {
+      case 0: return incompleteStateFor(byte1);
+      case 1: return incompleteStateFor(byte1, bytes[index]);
+      case 2: return incompleteStateFor(byte1, bytes[index], bytes[index + 1]);
+      default: throw new AssertionError();
+    }
+  }
+}
diff --git a/java/src/main/java/com/google/protobuf/WireFormat.java b/java/src/main/java/com/google/protobuf/WireFormat.java
index a30f2a3c..dd2d6310 100644
--- a/java/src/main/java/com/google/protobuf/WireFormat.java
+++ b/java/src/main/java/com/google/protobuf/WireFormat.java
@@ -146,7 +146,7 @@ public final class WireFormat {
     public boolean isPackable() { return true; }
   }
 
-  // Field numbers for feilds in MessageSet wire format.
+  // Field numbers for fields in MessageSet wire format.
   static final int MESSAGE_SET_ITEM    = 1;
   static final int MESSAGE_SET_TYPE_ID = 2;
   static final int MESSAGE_SET_MESSAGE = 3;
author	xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2012-09-22 02:40:50 +0000
committer	xiaofeng@google.com <xiaofeng@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2012-09-22 02:40:50 +0000
commit	b55a20fa2c669b181f47ea9219b8e74d1263da19 (patch)
tree	3936a0e7c22196587a6d8397372de41434fe2129 /java/src/main
parent	9ced30caf94bb4e7e9629c199679ff44e8ca7389 (diff)
download	protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.gz protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.tar.bz2 protobuf-b55a20fa2c669b181f47ea9219b8e74d1263da19.zip