diff options
author | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2008-11-21 00:06:27 +0000 |
---|---|---|
committer | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2008-11-21 00:06:27 +0000 |
commit | 26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch) | |
tree | d35cca89e0da44f136090a554ff9abc93a794fa8 | |
parent | a2a32c20434807e9966e3f48375f9419134d1b55 (diff) | |
download | protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.gz protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.bz2 protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.zip |
Integrate changes from internal code.
protoc
* Enum values may now have custom options, using syntax similar to field
options.
* Fixed bug where .proto files which use custom options but don't actually
define them (i.e. they import another .proto file defining the options)
had to explicitly import descriptor.proto.
* Adjacent string literals in .proto files will now be concatenated, like in
C.
C++
* Generated message classes now have a Swap() method which efficiently swaps
the contents of two objects.
* All message classes now have a SpaceUsed() method which returns an estimate
of the number of bytes of allocated memory currently owned by the object.
This is particularly useful when you are reusing a single message object
to improve performance but want to make sure it doesn't bloat up too large.
* New method Message::SerializeAsString() returns a string containing the
serialized data. May be more convenient than calling
SerializeToString(string*).
* In debug mode, log error messages when string-type fields are found to
contain bytes that are not valid UTF-8.
* Fixed bug where a message with multiple extension ranges couldn't parse
extensions.
* Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on
a message that contained no fields (but possibly contained extensions).
* Fixed ShortDebugString() to not be O(n^2). Durr.
* Fixed crash in TextFormat parsing if the first token in the input caused a
tokenization error.
Java
* New overload of mergeFrom() which parses a slice of a byte array instead
of the whole thing.
* New method ByteString.asReadOnlyByteBuffer() does what it sounds like.
* Improved performance of isInitialized() when optimizing for code size.
Python
* Corrected ListFields() signature in Message base class to match what
subclasses actually implement.
* Some minor refactoring.
76 files changed, 2461 insertions, 238 deletions
diff --git a/Makefile.am b/Makefile.am index 52f2caf5..c3d40b1f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -70,6 +70,7 @@ EXTRA_DIST = \ java/pom.xml \ java/README.txt \ python/google/protobuf/internal/generator_test.py \ + python/google/protobuf/internal/containers.py \ python/google/protobuf/internal/decoder.py \ python/google/protobuf/internal/decoder_test.py \ python/google/protobuf/internal/descriptor_test.py \ diff --git a/java/src/main/java/com/google/protobuf/AbstractMessage.java b/java/src/main/java/com/google/protobuf/AbstractMessage.java index 51c6e783..2f61859d 100644 --- a/java/src/main/java/com/google/protobuf/AbstractMessage.java +++ b/java/src/main/java/com/google/protobuf/AbstractMessage.java @@ -77,6 +77,7 @@ public abstract class AbstractMessage implements Message { return true; } + @Override public final String toString() { return TextFormat.printToString(this); } @@ -199,6 +200,7 @@ public abstract class AbstractMessage implements Message { public static abstract class Builder<BuilderType extends Builder> implements Message.Builder { // The compiler produces an error if this is not declared explicitly. + @Override public abstract BuilderType clone(); public BuilderType clear() { @@ -307,8 +309,13 @@ public abstract class AbstractMessage implements Message { public BuilderType mergeFrom(byte[] data) throws InvalidProtocolBufferException { + return mergeFrom(data, 0, data.length); + } + + public BuilderType mergeFrom(byte[] data, int off, int len) + throws InvalidProtocolBufferException { try { - CodedInputStream input = CodedInputStream.newInstance(data); + CodedInputStream input = CodedInputStream.newInstance(data, off, len); mergeFrom(input); input.checkLastTagWas(0); return (BuilderType) this; @@ -322,10 +329,18 @@ public abstract class AbstractMessage implements Message { } public BuilderType mergeFrom( - byte[] data, ExtensionRegistry extensionRegistry) + byte[] data, + ExtensionRegistry extensionRegistry) + throws InvalidProtocolBufferException { + return mergeFrom(data, 0, data.length, extensionRegistry); + } + + public BuilderType mergeFrom( + byte[] data, int off, int len, + ExtensionRegistry extensionRegistry) throws InvalidProtocolBufferException { try { - CodedInputStream input = CodedInputStream.newInstance(data); + CodedInputStream input = CodedInputStream.newInstance(data, off, len); mergeFrom(input, extensionRegistry); input.checkLastTagWas(0); return (BuilderType) this; diff --git a/java/src/main/java/com/google/protobuf/ByteString.java b/java/src/main/java/com/google/protobuf/ByteString.java index 9814dfc1..f376e7a1 100644 --- a/java/src/main/java/com/google/protobuf/ByteString.java +++ b/java/src/main/java/com/google/protobuf/ByteString.java @@ -35,6 +35,7 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FilterOutputStream; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; /** * Immutable array of bytes. @@ -154,6 +155,15 @@ public final class ByteString { } /** + * Constructs a new read-only {@code java.nio.ByteBuffer} with the + * same backing byte array. + */ + public ByteBuffer asReadOnlyByteBuffer() { + ByteBuffer byteBuffer = ByteBuffer.wrap(this.bytes); + return byteBuffer.asReadOnlyBuffer(); + } + + /** * Constructs a new {@code String} by decoding the bytes using the * specified charset. */ diff --git a/java/src/main/java/com/google/protobuf/CodedInputStream.java b/java/src/main/java/com/google/protobuf/CodedInputStream.java index c956ed2b..caef068b 100644 --- a/java/src/main/java/com/google/protobuf/CodedInputStream.java +++ b/java/src/main/java/com/google/protobuf/CodedInputStream.java @@ -59,7 +59,14 @@ public final class CodedInputStream { * Create a new CodedInputStream wrapping the given byte array. */ public static CodedInputStream newInstance(byte[] buf) { - return new CodedInputStream(buf); + return newInstance(buf, 0, buf.length); + } + + /** + * Create a new CodedInputStream wrapping the given byte array slice. + */ + public static CodedInputStream newInstance(byte[] buf, int off, int len) { + return new CodedInputStream(buf, off, len); } // ----------------------------------------------------------------- @@ -454,7 +461,7 @@ public final class CodedInputStream { private byte[] buffer; private int bufferSize; private int bufferSizeAfterLimit = 0; - private int bufferPos = 0; + private int bufferPos; private InputStream input; private int lastTag = 0; @@ -479,15 +486,17 @@ public final class CodedInputStream { private static final int DEFAULT_SIZE_LIMIT = 64 << 20; // 64MB private static final int BUFFER_SIZE = 4096; - private CodedInputStream(byte[] buffer) { + private CodedInputStream(byte[] buffer, int off, int len) { this.buffer = buffer; - this.bufferSize = buffer.length; + this.bufferSize = off + len; + this.bufferPos = off; this.input = null; } private CodedInputStream(InputStream input) { this.buffer = new byte[BUFFER_SIZE]; this.bufferSize = 0; + this.bufferPos = 0; this.input = input; } diff --git a/java/src/main/java/com/google/protobuf/GeneratedMessage.java b/java/src/main/java/com/google/protobuf/GeneratedMessage.java index bc231a15..b1be8b14 100644 --- a/java/src/main/java/com/google/protobuf/GeneratedMessage.java +++ b/java/src/main/java/com/google/protobuf/GeneratedMessage.java @@ -87,6 +87,33 @@ public abstract class GeneratedMessage extends AbstractMessage { } return result; } + + public boolean isInitialized() { + for (FieldDescriptor field : getDescriptorForType().getFields()) { + // Check that all required fields are present. + if (field.isRequired()) { + if (!hasField(field)) { + return false; + } + } + // Check that embedded messages are initialized. + if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { + if (field.isRepeated()) { + for (Message element : (List<Message>) getField(field)) { + if (!element.isInitialized()) { + return false; + } + } + } else { + if (hasField(field) && !((Message) getField(field)).isInitialized()) { + return false; + } + } + } + } + + return true; + } public Map<FieldDescriptor, Object> getAllFields() { return Collections.unmodifiableMap(getAllFieldsMutable()); @@ -370,6 +397,10 @@ public abstract class GeneratedMessage extends AbstractMessage { protected boolean extensionsAreInitialized() { return extensions.isInitialized(); } + + public boolean isInitialized() { + return super.isInitialized() && extensionsAreInitialized(); + } /** * Used by subclasses to serialize extensions. Extension ranges may be diff --git a/java/src/main/java/com/google/protobuf/Message.java b/java/src/main/java/com/google/protobuf/Message.java index add5dab6..9635387a 100644 --- a/java/src/main/java/com/google/protobuf/Message.java +++ b/java/src/main/java/com/google/protobuf/Message.java @@ -400,6 +400,13 @@ public interface Message { /** * Parse {@code data} as a message of this type and merge it with the * message being built. This is just a small wrapper around + * {@link #mergeFrom(CodedInputStream)}. + */ + public Builder mergeFrom(byte[] data, int off, int len) throws InvalidProtocolBufferException; + + /** + * Parse {@code data} as a message of this type and merge it with the + * message being built. This is just a small wrapper around * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. */ Builder mergeFrom(byte[] data, @@ -407,6 +414,15 @@ public interface Message { throws InvalidProtocolBufferException; /** + * Parse {@code data} as a message of this type and merge it with the + * message being built. This is just a small wrapper around + * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}. + */ + Builder mergeFrom(byte[] data, int off, int len, + ExtensionRegistry extensionRegistry) + throws InvalidProtocolBufferException; + + /** * Parse a message of this type from {@code input} and merge it with the * message being built. This is just a small wrapper around * {@link #mergeFrom(CodedInputStream)}. Note that this method always diff --git a/java/src/test/java/com/google/protobuf/CodedInputStreamTest.java b/java/src/test/java/com/google/protobuf/CodedInputStreamTest.java index eca02f74..eaaddf44 100644 --- a/java/src/test/java/com/google/protobuf/CodedInputStreamTest.java +++ b/java/src/test/java/com/google/protobuf/CodedInputStreamTest.java @@ -412,4 +412,14 @@ public class CodedInputStreamTest extends TestCase { String text = input.readString(); assertEquals(0xfffd, text.charAt(0)); } + + public void testReadFromSlice() throws Exception { + byte[] bytes = bytes(0, 1, 2, 3, 4, 5, 6, 7, 8, 9); + CodedInputStream in = CodedInputStream.newInstance(bytes, 3, 5); + for (int i = 3; i < 8; i++) { + assertEquals(i, in.readRawByte()); + } + // eof + assertEquals(0, in.readTag()); + } } diff --git a/java/src/test/java/com/google/protobuf/GeneratedMessageTest.java b/java/src/test/java/com/google/protobuf/GeneratedMessageTest.java index 958943da..95c269a2 100644 --- a/java/src/test/java/com/google/protobuf/GeneratedMessageTest.java +++ b/java/src/test/java/com/google/protobuf/GeneratedMessageTest.java @@ -30,8 +30,9 @@ package com.google.protobuf; -import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize; +import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize; import protobuf_unittest.UnittestOptimizeFor.TestOptionalOptimizedForSize; +import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize; import protobuf_unittest.UnittestProto; import protobuf_unittest.UnittestProto.ForeignMessage; import protobuf_unittest.UnittestProto.ForeignEnum; @@ -260,8 +261,10 @@ public class GeneratedMessageTest extends TestCase { MultipleFilesTestProto.extensionWithOuter)); } - public void testOptionalFieldWithRequiredSubfieldsOptimizedForSize() throws Exception { - TestOptionalOptimizedForSize message = TestOptionalOptimizedForSize.getDefaultInstance(); + public void testOptionalFieldWithRequiredSubfieldsOptimizedForSize() + throws Exception { + TestOptionalOptimizedForSize message = + TestOptionalOptimizedForSize.getDefaultInstance(); assertTrue(message.isInitialized()); message = TestOptionalOptimizedForSize.newBuilder().setO( @@ -274,4 +277,19 @@ public class GeneratedMessageTest extends TestCase { ).buildPartial(); assertTrue(message.isInitialized()); } + + public void testUninitializedExtensionInOptimizedForSize() + throws Exception { + TestOptimizedForSize.Builder builder = TestOptimizedForSize.newBuilder(); + builder.setExtension(TestOptimizedForSize.testExtension2, + TestRequiredOptimizedForSize.newBuilder().buildPartial()); + assertFalse(builder.isInitialized()); + assertFalse(builder.buildPartial().isInitialized()); + + builder = TestOptimizedForSize.newBuilder(); + builder.setExtension(TestOptimizedForSize.testExtension2, + TestRequiredOptimizedForSize.newBuilder().setX(10).buildPartial()); + assertTrue(builder.isInitialized()); + assertTrue(builder.buildPartial().isInitialized()); + } } diff --git a/java/src/test/java/com/google/protobuf/WireFormatTest.java b/java/src/test/java/com/google/protobuf/WireFormatTest.java index d7295c20..3fb54fcf 100644 --- a/java/src/test/java/com/google/protobuf/WireFormatTest.java +++ b/java/src/test/java/com/google/protobuf/WireFormatTest.java @@ -135,6 +135,47 @@ public class WireFormatTest extends TestCase { assertFieldsInOrder(dynamic_data); } + private ExtensionRegistry getTestFieldOrderingsRegistry() { + ExtensionRegistry result = ExtensionRegistry.newInstance(); + result.add(UnittestProto.myExtensionInt); + result.add(UnittestProto.myExtensionString); + return result; + } + + public void testParseMultipleExtensionRanges() throws Exception { + // Make sure we can parse a message that contains multiple extensions + // ranges. + TestFieldOrderings source = + TestFieldOrderings.newBuilder() + .setMyInt(1) + .setMyString("foo") + .setMyFloat(1.0F) + .setExtension(UnittestProto.myExtensionInt, 23) + .setExtension(UnittestProto.myExtensionString, "bar") + .build(); + TestFieldOrderings dest = + TestFieldOrderings.parseFrom(source.toByteString(), + getTestFieldOrderingsRegistry()); + assertEquals(source, dest); + } + + public void testParseMultipleExtensionRangesDynamic() throws Exception { + // Same as above except with DynamicMessage. + Descriptors.Descriptor descriptor = TestFieldOrderings.getDescriptor(); + DynamicMessage source = + DynamicMessage.newBuilder(TestFieldOrderings.getDescriptor()) + .setField(descriptor.findFieldByName("my_int"), 1L) + .setField(descriptor.findFieldByName("my_string"), "foo") + .setField(descriptor.findFieldByName("my_float"), 1.0F) + .setField(UnittestProto.myExtensionInt.getDescriptor(), 23) + .setField(UnittestProto.myExtensionString.getDescriptor(), "bar") + .build(); + DynamicMessage dest = + DynamicMessage.parseFrom(descriptor, source.toByteString(), + getTestFieldOrderingsRegistry()); + assertEquals(source, dest); + } + private static final int UNKNOWN_TYPE_ID = 1550055; private static final int TYPE_ID_1 = TestMessageSetExtension1.getDescriptor().getExtensions().get(0).getNumber(); diff --git a/python/google/protobuf/internal/containers.py b/python/google/protobuf/internal/containers.py new file mode 100755 index 00000000..982badc8 --- /dev/null +++ b/python/google/protobuf/internal/containers.py @@ -0,0 +1,179 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# http://code.google.com/p/protobuf/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Contains container classes to represent different protocol buffer types. + +This file defines container classes which represent categories of protocol +buffer field types which need extra maintenance. Currently these categories +are: + - Repeated scalar fields - These are all repeated fields which aren't + composite (e.g. they are of simple types like int32, string, etc). + - Repeated composite fields - Repeated fields which are composite. This + includes groups and nested messages. +""" + +__author__ = 'petar@google.com (Petar Petrov)' + + +class BaseContainer(object): + + """Base container class.""" + + # Minimizes memory usage and disallows assignment to other attributes. + __slots__ = ['_message_listener', '_values'] + + def __init__(self, message_listener): + """ + Args: + message_listener: A MessageListener implementation. + The RepeatedScalarFieldContainer will call this object's + TransitionToNonempty() method when it transitions from being empty to + being nonempty. + """ + self._message_listener = message_listener + self._values = [] + + def __getitem__(self, key): + """Retrieves item by the specified key.""" + return self._values[key] + + def __len__(self): + """Returns the number of elements in the container.""" + return len(self._values) + + def __ne__(self, other): + """Checks if another instance isn't equal to this one.""" + # The concrete classes should define __eq__. + return not self == other + + +class RepeatedScalarFieldContainer(BaseContainer): + + """Simple, type-checked, list-like container for holding repeated scalars.""" + + # Disallows assignment to other attributes. + __slots__ = ['_type_checker'] + + def __init__(self, message_listener, type_checker): + """ + Args: + message_listener: A MessageListener implementation. + The RepeatedScalarFieldContainer will call this object's + TransitionToNonempty() method when it transitions from being empty to + being nonempty. + type_checker: A type_checkers.ValueChecker instance to run on elements + inserted into this container. + """ + super(RepeatedScalarFieldContainer, self).__init__(message_listener) + self._type_checker = type_checker + + def append(self, elem): + """Appends a scalar to the list. Similar to list.append().""" + self._type_checker.CheckValue(elem) + self._values.append(elem) + self._message_listener.ByteSizeDirty() + if len(self._values) == 1: + self._message_listener.TransitionToNonempty() + + def remove(self, elem): + """Removes a scalar from the list. Similar to list.remove().""" + self._values.remove(elem) + self._message_listener.ByteSizeDirty() + + def __setitem__(self, key, value): + """Sets the item on the specified position.""" + # No need to call TransitionToNonempty(), since if we're able to + # set the element at this index, we were already nonempty before + # this method was called. + self._message_listener.ByteSizeDirty() + self._type_checker.CheckValue(value) + self._values[key] = value + + def __eq__(self, other): + """Compares the current instance with another one.""" + if self is other: + return True + # Special case for the same type which should be common and fast. + if isinstance(other, self.__class__): + return other._values == self._values + # We are presumably comparing against some other sequence type. + return other == self._values + + +class RepeatedCompositeFieldContainer(BaseContainer): + + """Simple, list-like container for holding repeated composite fields.""" + + # Disallows assignment to other attributes. + __slots__ = ['_message_descriptor'] + + def __init__(self, message_listener, message_descriptor): + """ + Note that we pass in a descriptor instead of the generated directly, + since at the time we construct a _RepeatedCompositeFieldContainer we + haven't yet necessarily initialized the type that will be contained in the + container. + + Args: + message_listener: A MessageListener implementation. + The RepeatedCompositeFieldContainer will call this object's + TransitionToNonempty() method when it transitions from being empty to + being nonempty. + message_descriptor: A Descriptor instance describing the protocol type + that should be present in this container. We'll use the + _concrete_class field of this descriptor when the client calls add(). + """ + super(RepeatedCompositeFieldContainer, self).__init__(message_listener) + self._message_descriptor = message_descriptor + + def add(self): + """Adds a new element to the list and returns it.""" + new_element = self._message_descriptor._concrete_class() + new_element._SetListener(self._message_listener) + self._values.append(new_element) + self._message_listener.ByteSizeDirty() + self._message_listener.TransitionToNonempty() + return new_element + + def __delitem__(self, key): + """Deletes the element on the specified position.""" + self._message_listener.ByteSizeDirty() + del self._values[key] + + def __eq__(self, other): + """Compares the current instance with another one.""" + if self is other: + return True + if not isinstance(other, self.__class__): + raise TypeError('Can only compare repeated composite fields against ' + 'other repeated composite fields.') + return self._values == other._values + + # TODO(robinson): Implement, document, and test slicing support. diff --git a/python/google/protobuf/internal/decoder_test.py b/python/google/protobuf/internal/decoder_test.py index e020f8dc..abcc07fc 100755 --- a/python/google/protobuf/internal/decoder_test.py +++ b/python/google/protobuf/internal/decoder_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/descriptor_test.py b/python/google/protobuf/internal/descriptor_test.py index eecf09ba..eb9f2be8 100755 --- a/python/google/protobuf/internal/descriptor_test.py +++ b/python/google/protobuf/internal/descriptor_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/encoder_test.py b/python/google/protobuf/internal/encoder_test.py index 4a0c3d23..61668223 100755 --- a/python/google/protobuf/internal/encoder_test.py +++ b/python/google/protobuf/internal/encoder_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/generator_test.py b/python/google/protobuf/internal/generator_test.py index ce22f2bc..11fcfa0c 100755 --- a/python/google/protobuf/internal/generator_test.py +++ b/python/google/protobuf/internal/generator_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/input_stream_test.py b/python/google/protobuf/internal/input_stream_test.py index d96a14c6..8cc1d126 100755 --- a/python/google/protobuf/internal/input_stream_test.py +++ b/python/google/protobuf/internal/input_stream_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/output_stream_test.py b/python/google/protobuf/internal/output_stream_test.py index 36da5327..df92eecd 100755 --- a/python/google/protobuf/internal/output_stream_test.py +++ b/python/google/protobuf/internal/output_stream_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py index c8e60005..c2ca5132 100755 --- a/python/google/protobuf/internal/reflection_test.py +++ b/python/google/protobuf/internal/reflection_test.py @@ -1,5 +1,6 @@ +#! /usr/bin/python # -*- coding: utf-8 -*- - +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/service_reflection_test.py b/python/google/protobuf/internal/service_reflection_test.py index 00a36af6..29492e16 100755 --- a/python/google/protobuf/internal/service_reflection_test.py +++ b/python/google/protobuf/internal/service_reflection_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/text_format_test.py b/python/google/protobuf/internal/text_format_test.py index c48760e2..871590e7 100755 --- a/python/google/protobuf/internal/text_format_test.py +++ b/python/google/protobuf/internal/text_format_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/internal/wire_format_test.py b/python/google/protobuf/internal/wire_format_test.py index 4f6078f2..76007786 100755 --- a/python/google/protobuf/internal/wire_format_test.py +++ b/python/google/protobuf/internal/wire_format_test.py @@ -1,3 +1,5 @@ +#! /usr/bin/python +# # Protocol Buffers - Google's data interchange format # Copyright 2008 Google Inc. All rights reserved. # http://code.google.com/p/protobuf/ diff --git a/python/google/protobuf/message.py b/python/google/protobuf/message.py index 83779b15..4da024ca 100755 --- a/python/google/protobuf/message.py +++ b/python/google/protobuf/message.py @@ -198,7 +198,7 @@ class Message(object): # Typically (in python), an underscore is appended to names that are # keywords. So they would become lambda_ or yield_. # """ - def ListFields(self, field_name): + def ListFields(self): """Returns a list of (FieldDescriptor, value) tuples for all fields in the message which are not empty. A singular field is non-empty if HasField() would return true, and a repeated field is non-empty if diff --git a/python/google/protobuf/reflection.py b/python/google/protobuf/reflection.py index d80942e8..c35742ce 100755 --- a/python/google/protobuf/reflection.py +++ b/python/google/protobuf/reflection.py @@ -54,6 +54,7 @@ import heapq import threading import weakref # We use "as" to avoid name collisions with variables. +from google.protobuf.internal import containers from google.protobuf.internal import decoder from google.protobuf.internal import encoder from google.protobuf.internal import message_listener as message_listener_mod @@ -274,9 +275,10 @@ def _DefaultValueForField(message, field): if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: # We can't look at _concrete_class yet since it might not have # been set. (Depends on order in which we initialize the classes). - return _RepeatedCompositeFieldContainer(listener, field.message_type) + return containers.RepeatedCompositeFieldContainer( + listener, field.message_type) else: - return _RepeatedScalarFieldContainer( + return containers.RepeatedScalarFieldContainer( listener, type_checkers.GetTypeChecker(field.cpp_type, field.type)) if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE: @@ -1270,135 +1272,6 @@ class _Listener(object): pass -# TODO(robinson): Move elsewhere? -# TODO(robinson): Provide a clear() method here in addition to ClearField()? -class _RepeatedScalarFieldContainer(object): - - """Simple, type-checked, list-like container for holding repeated scalars.""" - - # Minimizes memory usage and disallows assignment to other attributes. - __slots__ = ['_message_listener', '_type_checker', '_values'] - - def __init__(self, message_listener, type_checker): - """ - Args: - message_listener: A MessageListener implementation. - The _RepeatedScalarFieldContaininer will call this object's - TransitionToNonempty() method when it transitions from being empty to - being nonempty. - type_checker: A _ValueChecker instance to run on elements inserted - into this container. - """ - self._message_listener = message_listener - self._type_checker = type_checker - self._values = [] - - def append(self, elem): - self._type_checker.CheckValue(elem) - self._values.append(elem) - self._message_listener.ByteSizeDirty() - if len(self._values) == 1: - self._message_listener.TransitionToNonempty() - - def remove(self, elem): - self._values.remove(elem) - self._message_listener.ByteSizeDirty() - - # List-like __getitem__() support also makes us iterable (via "iter(foo)" - # or implicitly via "for i in mylist:") for free. - def __getitem__(self, key): - return self._values[key] - - def __setitem__(self, key, value): - # No need to call TransitionToNonempty(), since if we're able to - # set the element at this index, we were already nonempty before - # this method was called. - self._message_listener.ByteSizeDirty() - self._type_checker.CheckValue(value) - self._values[key] = value - - def __len__(self): - return len(self._values) - - def __eq__(self, other): - if self is other: - return True - # Special case for the same type which should be common and fast. - if isinstance(other, self.__class__): - return other._values == self._values - # We are presumably comparing against some other sequence type. - return other == self._values - - def __ne__(self, other): - # Can't use != here since it would infinitely recurse. - return not self == other - - -# TODO(robinson): Move elsewhere? -# TODO(robinson): Provide a clear() method here in addition to ClearField()? -# TODO(robinson): Unify common functionality with -# _RepeatedScalarFieldContaininer? -class _RepeatedCompositeFieldContainer(object): - - """Simple, list-like container for holding repeated composite fields.""" - - # Minimizes memory usage and disallows assignment to other attributes. - __slots__ = ['_values', '_message_descriptor', '_message_listener'] - - def __init__(self, message_listener, message_descriptor): - """Note that we pass in a descriptor instead of the generated directly, - since at the time we construct a _RepeatedCompositeFieldContainer we - haven't yet necessarily initialized the type that will be contained in the - container. - - Args: - message_listener: A MessageListener implementation. - The _RepeatedCompositeFieldContainer will call this object's - TransitionToNonempty() method when it transitions from being empty to - being nonempty. - message_descriptor: A Descriptor instance describing the protocol type - that should be present in this container. We'll use the - _concrete_class field of this descriptor when the client calls add(). - """ - self._message_listener = message_listener - self._message_descriptor = message_descriptor - self._values = [] - - def add(self): - new_element = self._message_descriptor._concrete_class() - new_element._SetListener(self._message_listener) - self._values.append(new_element) - self._message_listener.ByteSizeDirty() - self._message_listener.TransitionToNonempty() - return new_element - - def __delitem__(self, key): - self._message_listener.ByteSizeDirty() - del self._values[key] - - # List-like __getitem__() support also makes us iterable (via "iter(foo)" - # or implicitly via "for i in mylist:") for free. - def __getitem__(self, key): - return self._values[key] - - def __len__(self): - return len(self._values) - - def __eq__(self, other): - if self is other: - return True - if not isinstance(other, self.__class__): - raise TypeError('Can only compare repeated composite fields against ' - 'other repeated composite fields.') - return self._values == other._values - - def __ne__(self, other): - # Can't use != here since it would infinitely recurse. - return not self == other - - # TODO(robinson): Implement, document, and test slicing support. - - # TODO(robinson): Move elsewhere? This file is getting pretty ridiculous... # TODO(robinson): Unify error handling of "unknown extension" crap. # TODO(robinson): There's so much similarity between the way that diff --git a/python/setup.py b/python/setup.py index f0ee329d..cda2c0eb 100755 --- a/python/setup.py +++ b/python/setup.py @@ -108,6 +108,7 @@ if __name__ == '__main__': test_suite = 'setup.MakeTestSuite', # Must list modules explicitly so that we don't install tests. py_modules = [ + 'google.protobuf.internal.containers', 'google.protobuf.internal.decoder', 'google.protobuf.internal.encoder', 'google.protobuf.internal.input_stream', diff --git a/src/Makefile.am b/src/Makefile.am index 41d881fd..9e97e62c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -68,6 +68,7 @@ libprotobuf_la_SOURCES = \ google/protobuf/stubs/substitute.h \ google/protobuf/stubs/strutil.cc \ google/protobuf/stubs/strutil.h \ + google/protobuf/stubs/structurally_valid.cc \ google/protobuf/descriptor.cc \ google/protobuf/descriptor.pb.cc \ google/protobuf/descriptor_database.cc \ @@ -209,6 +210,7 @@ protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la protobuf_test_SOURCES = \ google/protobuf/stubs/common_unittest.cc \ google/protobuf/stubs/strutil_unittest.cc \ + google/protobuf/stubs/structurally_valid_unittest.cc \ google/protobuf/descriptor_database_unittest.cc \ google/protobuf/descriptor_unittest.cc \ google/protobuf/dynamic_message_unittest.cc \ diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index a19d16d1..ea8c4ab2 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -32,6 +32,7 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. +#include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> diff --git a/src/google/protobuf/compiler/cpp/cpp_enum_field.cc b/src/google/protobuf/compiler/cpp/cpp_enum_field.cc index 6f0bc270..c998f20b 100644 --- a/src/google/protobuf/compiler/cpp/cpp_enum_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_enum_field.cc @@ -111,6 +111,11 @@ GenerateMergingCode(io::Printer* printer) const { } void EnumFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "std::swap($name$_, other->$name$_);\n"); +} + +void EnumFieldGenerator:: GenerateInitializer(io::Printer* printer) const { printer->Print(variables_, ",\n$name$_($default$)"); } @@ -201,6 +206,11 @@ GenerateMergingCode(io::Printer* printer) const { } void RepeatedEnumFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n"); +} + +void RepeatedEnumFieldGenerator:: GenerateInitializer(io::Printer* printer) const { // Not needed for repeated fields. } diff --git a/src/google/protobuf/compiler/cpp/cpp_enum_field.h b/src/google/protobuf/compiler/cpp/cpp_enum_field.h index 6bcd0b1b..f67b7ac0 100644 --- a/src/google/protobuf/compiler/cpp/cpp_enum_field.h +++ b/src/google/protobuf/compiler/cpp/cpp_enum_field.h @@ -55,6 +55,7 @@ class EnumFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; @@ -78,6 +79,7 @@ class RepeatedEnumFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; diff --git a/src/google/protobuf/compiler/cpp/cpp_field.h b/src/google/protobuf/compiler/cpp/cpp_field.h index 925483a7..e5f8258f 100644 --- a/src/google/protobuf/compiler/cpp/cpp_field.h +++ b/src/google/protobuf/compiler/cpp/cpp_field.h @@ -87,6 +87,13 @@ class FieldGenerator { // GenerateMergeFrom method. virtual void GenerateMergingCode(io::Printer* printer) const = 0; + // Generate lines of code (statements, not declarations) which swaps + // this field and the corresponding field of another message, which + // is stored in the generated code variable "other". This is used to + // define the Swap method. Details of usage can be found in + // message.cc under the GenerateSwap method. + virtual void GenerateSwappingCode(io::Printer* printer) const = 0; + // Generate any initializers needed for the private members declared by // GeneratePrivateMembers(). These go into the message class's // constructor's initializer list. For each initializer, this method diff --git a/src/google/protobuf/compiler/cpp/cpp_message.cc b/src/google/protobuf/compiler/cpp/cpp_message.cc index f3fd48d1..eacceeaf 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message.cc +++ b/src/google/protobuf/compiler/cpp/cpp_message.cc @@ -416,7 +416,8 @@ GenerateClassDefinition(io::Printer* printer) { "}\n" "\n" "static const ::google::protobuf::Descriptor* descriptor();\n" - "static const $classname$& default_instance();" + "static const $classname$& default_instance();\n" + "void Swap($classname$* other);\n" "\n" "// implements Message ----------------------------------------------\n" "\n" @@ -617,7 +618,8 @@ GenerateDescriptorInitializer(io::Printer* printer, int index) { " -1,\n"); } printer->Print(vars, - " ::google::protobuf::DescriptorPool::generated_pool());\n"); + " ::google::protobuf::DescriptorPool::generated_pool(),\n" + " sizeof($classname$));\n"); // Handle nested types. for (int i = 0; i < descriptor_->nested_type_count(); i++) { @@ -693,6 +695,9 @@ GenerateClassMethods(io::Printer* printer) { GenerateCopyFrom(printer); printer->Print("\n"); + GenerateSwap(printer); + printer->Print("\n"); + GenerateIsInitialized(printer); printer->Print("\n"); } @@ -947,6 +952,37 @@ GenerateClear(io::Printer* printer) { } void MessageGenerator:: +GenerateSwap(io::Printer* printer) { + // Generate the Swap member function. + printer->Print("void $classname$::Swap($classname$* other) {\n", + "classname", classname_); + printer->Indent(); + printer->Print("if (other != this) {\n"); + printer->Indent(); + + for (int i = 0; i < descriptor_->field_count(); i++) { + const FieldDescriptor* field = descriptor_->field(i); + field_generators_.get(field).GenerateSwappingCode(printer); + } + + for (int i = 0; i < (descriptor_->field_count() + 31) / 32; ++i) { + printer->Print("std::swap(_has_bits_[$i$], other->_has_bits_[$i$]);\n", + "i", SimpleItoa(i)); + } + + printer->Print("_unknown_fields_.Swap(&other->_unknown_fields_);\n"); + printer->Print("std::swap(_cached_size_, other->_cached_size_);\n"); + if (descriptor_->extension_range_count() > 0) { + printer->Print("_extensions_.Swap(&other->_extensions_);\n"); + } + + printer->Outdent(); + printer->Print("}\n"); + printer->Outdent(); + printer->Print("}\n"); +} + +void MessageGenerator:: GenerateMergeFrom(io::Printer* printer) { // Generate the generalized MergeFrom (aka that which takes in the Message // base class as a parameter). @@ -956,22 +992,20 @@ GenerateMergeFrom(io::Printer* printer) { "classname", classname_); printer->Indent(); - if (descriptor_->field_count() > 0) { - // Cast the message to the proper type. If we find that the message is - // *not* of the proper type, we can still call Merge via the reflection - // system, as the GOOGLE_CHECK above ensured that we have the same descriptor - // for each message. - printer->Print( - "const $classname$* source =\n" - " ::google::protobuf::internal::dynamic_cast_if_available<const $classname$*>(\n" - " &from);\n" - "if (source == NULL) {\n" - " ::google::protobuf::internal::ReflectionOps::Merge(from, this);\n" - "} else {\n" - " MergeFrom(*source);\n" - "}\n", - "classname", classname_); - } + // Cast the message to the proper type. If we find that the message is + // *not* of the proper type, we can still call Merge via the reflection + // system, as the GOOGLE_CHECK above ensured that we have the same descriptor + // for each message. + printer->Print( + "const $classname$* source =\n" + " ::google::protobuf::internal::dynamic_cast_if_available<const $classname$*>(\n" + " &from);\n" + "if (source == NULL) {\n" + " ::google::protobuf::internal::ReflectionOps::Merge(from, this);\n" + "} else {\n" + " MergeFrom(*source);\n" + "}\n", + "classname", classname_); printer->Outdent(); printer->Print("}\n\n"); @@ -1199,7 +1233,7 @@ GenerateMergeFromCodedStream(io::Printer* printer) { for (int i = 0; i < descriptor_->extension_range_count(); i++) { const Descriptor::ExtensionRange* range = descriptor_->extension_range(i); - if (i > 0) printer->Print(" &&\n "); + if (i > 0) printer->Print(" ||\n "); uint32 start_tag = WireFormat::MakeTag( range->start, static_cast<WireFormat::WireType>(0)); diff --git a/src/google/protobuf/compiler/cpp/cpp_message.h b/src/google/protobuf/compiler/cpp/cpp_message.h index 037ddd85..d6669a34 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message.h +++ b/src/google/protobuf/compiler/cpp/cpp_message.h @@ -115,6 +115,7 @@ class MessageGenerator { void GenerateByteSize(io::Printer* printer); void GenerateMergeFrom(io::Printer* printer); void GenerateCopyFrom(io::Printer* printer); + void GenerateSwap(io::Printer* printer); void GenerateIsInitialized(io::Printer* printer); // Helpers for GenerateSerializeWithCachedSizes(). diff --git a/src/google/protobuf/compiler/cpp/cpp_message_field.cc b/src/google/protobuf/compiler/cpp/cpp_message_field.cc index 8904974f..d1c31067 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_message_field.cc @@ -111,6 +111,11 @@ GenerateMergingCode(io::Printer* printer) const { } void MessageFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "std::swap($name$_, other->$name$_);\n"); +} + +void MessageFieldGenerator:: GenerateInitializer(io::Printer* printer) const { printer->Print(variables_, ",\n$name$_(NULL)"); } @@ -202,6 +207,11 @@ GenerateMergingCode(io::Printer* printer) const { } void RepeatedMessageFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n"); +} + +void RepeatedMessageFieldGenerator:: GenerateInitializer(io::Printer* printer) const { // Not needed for repeated fields. } diff --git a/src/google/protobuf/compiler/cpp/cpp_message_field.h b/src/google/protobuf/compiler/cpp/cpp_message_field.h index 0405cbc8..7ce4c32b 100644 --- a/src/google/protobuf/compiler/cpp/cpp_message_field.h +++ b/src/google/protobuf/compiler/cpp/cpp_message_field.h @@ -55,6 +55,7 @@ class MessageFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; @@ -78,6 +79,7 @@ class RepeatedMessageFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; diff --git a/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc b/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc index 0b437d68..ef4072f0 100644 --- a/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_primitive_field.cc @@ -175,6 +175,11 @@ GenerateMergingCode(io::Printer* printer) const { } void PrimitiveFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "std::swap($name$_, other->$name$_);\n"); +} + +void PrimitiveFieldGenerator:: GenerateInitializer(io::Printer* printer) const { printer->Print(variables_, ",\n$name$_($default$)"); } @@ -267,6 +272,11 @@ GenerateMergingCode(io::Printer* printer) const { } void RepeatedPrimitiveFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n"); +} + +void RepeatedPrimitiveFieldGenerator:: GenerateInitializer(io::Printer* printer) const { // Not needed for repeated fields. } diff --git a/src/google/protobuf/compiler/cpp/cpp_primitive_field.h b/src/google/protobuf/compiler/cpp/cpp_primitive_field.h index 25b7fba4..c7f7f310 100644 --- a/src/google/protobuf/compiler/cpp/cpp_primitive_field.h +++ b/src/google/protobuf/compiler/cpp/cpp_primitive_field.h @@ -55,6 +55,7 @@ class PrimitiveFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; @@ -78,6 +79,7 @@ class RepeatedPrimitiveFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; diff --git a/src/google/protobuf/compiler/cpp/cpp_string_field.cc b/src/google/protobuf/compiler/cpp/cpp_string_field.cc index 66a0496c..3e694ab7 100644 --- a/src/google/protobuf/compiler/cpp/cpp_string_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_string_field.cc @@ -163,12 +163,12 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const { "inline ::std::string* $classname$::mutable_$name$() {\n" " _set_bit($index$);\n" " if ($name$_ == &_default_$name$_) {\n"); - if (descriptor_->has_default_value()) { + if (descriptor_->default_value_string().empty()) { printer->Print(variables_, - " $name$_ = new ::std::string(_default_$name$_);\n"); + " $name$_ = new ::std::string;\n"); } else { printer->Print(variables_, - " $name$_ = new ::std::string;\n"); + " $name$_ = new ::std::string(_default_$name$_);\n"); } printer->Print(variables_, " }\n" @@ -178,26 +178,26 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const { void StringFieldGenerator:: GenerateNonInlineAccessorDefinitions(io::Printer* printer) const { - if (descriptor_->has_default_value()) { + if (descriptor_->default_value_string().empty()) { printer->Print(variables_, - "const ::std::string $classname$::_default_$name$_($default$);"); + "const ::std::string $classname$::_default_$name$_;"); } else { printer->Print(variables_, - "const ::std::string $classname$::_default_$name$_;"); + "const ::std::string $classname$::_default_$name$_($default$);"); } } void StringFieldGenerator:: GenerateClearingCode(io::Printer* printer) const { - if (descriptor_->has_default_value()) { + if (descriptor_->default_value_string().empty()) { printer->Print(variables_, "if ($name$_ != &_default_$name$_) {\n" - " $name$_->assign(_default_$name$_);\n" + " $name$_->clear();\n" "}\n"); } else { printer->Print(variables_, "if ($name$_ != &_default_$name$_) {\n" - " $name$_->clear();\n" + " $name$_->assign(_default_$name$_);\n" "}\n"); } } @@ -208,6 +208,11 @@ GenerateMergingCode(io::Printer* printer) const { } void StringFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "std::swap($name$_, other->$name$_);\n"); +} + +void StringFieldGenerator:: GenerateInitializer(io::Printer* printer) const { printer->Print(variables_, ",\n$name$_(const_cast< ::std::string*>(&_default_$name$_))"); @@ -350,6 +355,11 @@ GenerateMergingCode(io::Printer* printer) const { } void RepeatedStringFieldGenerator:: +GenerateSwappingCode(io::Printer* printer) const { + printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n"); +} + +void RepeatedStringFieldGenerator:: GenerateInitializer(io::Printer* printer) const { // Not needed for repeated fields. } diff --git a/src/google/protobuf/compiler/cpp/cpp_string_field.h b/src/google/protobuf/compiler/cpp/cpp_string_field.h index a0e5dce9..2244bd77 100644 --- a/src/google/protobuf/compiler/cpp/cpp_string_field.h +++ b/src/google/protobuf/compiler/cpp/cpp_string_field.h @@ -56,6 +56,7 @@ class StringFieldGenerator : public FieldGenerator { void GenerateNonInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateDestructorCode(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; @@ -80,6 +81,7 @@ class RepeatedStringFieldGenerator : public FieldGenerator { void GenerateInlineAccessorDefinitions(io::Printer* printer) const; void GenerateClearingCode(io::Printer* printer) const; void GenerateMergingCode(io::Printer* printer) const; + void GenerateSwappingCode(io::Printer* printer) const; void GenerateInitializer(io::Printer* printer) const; void GenerateMergeFromCodedStream(io::Printer* printer) const; void GenerateSerializeWithCachedSizes(io::Printer* printer) const; diff --git a/src/google/protobuf/compiler/cpp/cpp_unittest.cc b/src/google/protobuf/compiler/cpp/cpp_unittest.cc index ce7d0c88..393c923b 100644 --- a/src/google/protobuf/compiler/cpp/cpp_unittest.cc +++ b/src/google/protobuf/compiler/cpp/cpp_unittest.cc @@ -236,6 +236,83 @@ TEST(GeneratedMessageTest, CopyFrom) { TestUtil::ExpectAllFieldsSet(message2); } +TEST(GeneratedMessageTest, SwapWithEmpty) { + unittest::TestAllTypes message1, message2; + TestUtil::SetAllFields(&message1); + + TestUtil::ExpectAllFieldsSet(message1); + TestUtil::ExpectClear(message2); + message1.Swap(&message2); + TestUtil::ExpectAllFieldsSet(message2); + TestUtil::ExpectClear(message1); +} + +TEST(GeneratedMessageTest, SwapWithSelf) { + unittest::TestAllTypes message; + TestUtil::SetAllFields(&message); + TestUtil::ExpectAllFieldsSet(message); + message.Swap(&message); + TestUtil::ExpectAllFieldsSet(message); +} + +TEST(GeneratedMessageTest, SwapWithOther) { + unittest::TestAllTypes message1, message2; + + message1.set_optional_int32(123); + message1.set_optional_string("abc"); + message1.mutable_optional_nested_message()->set_bb(1); + message1.set_optional_nested_enum(unittest::TestAllTypes::FOO); + message1.add_repeated_int32(1); + message1.add_repeated_int32(2); + message1.add_repeated_string("a"); + message1.add_repeated_string("b"); + message1.add_repeated_nested_message()->set_bb(7); + message1.add_repeated_nested_message()->set_bb(8); + message1.add_repeated_nested_enum(unittest::TestAllTypes::FOO); + message1.add_repeated_nested_enum(unittest::TestAllTypes::BAR); + + message2.set_optional_int32(456); + message2.set_optional_string("def"); + message2.mutable_optional_nested_message()->set_bb(2); + message2.set_optional_nested_enum(unittest::TestAllTypes::BAR); + message2.add_repeated_int32(3); + message2.add_repeated_string("c"); + message2.add_repeated_nested_message()->set_bb(9); + message2.add_repeated_nested_enum(unittest::TestAllTypes::BAZ); + + message1.Swap(&message2); + + EXPECT_EQ(456, message1.optional_int32()); + EXPECT_EQ("def", message1.optional_string()); + EXPECT_EQ(2, message1.optional_nested_message().bb()); + EXPECT_EQ(unittest::TestAllTypes::BAR, message1.optional_nested_enum()); + ASSERT_EQ(1, message1.repeated_int32_size()); + EXPECT_EQ(3, message1.repeated_int32(0)); + ASSERT_EQ(1, message1.repeated_string_size()); + EXPECT_EQ("c", message1.repeated_string(0)); + ASSERT_EQ(1, message1.repeated_nested_message_size()); + EXPECT_EQ(9, message1.repeated_nested_message(0).bb()); + ASSERT_EQ(1, message1.repeated_nested_enum_size()); + EXPECT_EQ(unittest::TestAllTypes::BAZ, message1.repeated_nested_enum(0)); + + EXPECT_EQ(123, message2.optional_int32()); + EXPECT_EQ("abc", message2.optional_string()); + EXPECT_EQ(1, message2.optional_nested_message().bb()); + EXPECT_EQ(unittest::TestAllTypes::FOO, message2.optional_nested_enum()); + ASSERT_EQ(2, message2.repeated_int32_size()); + EXPECT_EQ(1, message2.repeated_int32(0)); + EXPECT_EQ(2, message2.repeated_int32(1)); + ASSERT_EQ(2, message2.repeated_string_size()); + EXPECT_EQ("a", message2.repeated_string(0)); + EXPECT_EQ("b", message2.repeated_string(1)); + ASSERT_EQ(2, message2.repeated_nested_message_size()); + EXPECT_EQ(7, message2.repeated_nested_message(0).bb()); + EXPECT_EQ(8, message2.repeated_nested_message(1).bb()); + ASSERT_EQ(2, message2.repeated_nested_enum_size()); + EXPECT_EQ(unittest::TestAllTypes::FOO, message2.repeated_nested_enum(0)); + EXPECT_EQ(unittest::TestAllTypes::BAR, message2.repeated_nested_enum(1)); +} + TEST(GeneratedMessageTest, CopyConstructor) { unittest::TestAllTypes message1; TestUtil::SetAllFields(&message1); @@ -492,6 +569,45 @@ TEST(GeneratedMessageTest, TestEmbedOptimizedForSize) { EXPECT_EQ(2, message2.repeated_message(0).msg().c()); } +TEST(GeneratedMessageTest, TestSpaceUsed) { + unittest::TestAllTypes message1; + // sizeof provides a lower bound on SpaceUsed(). + EXPECT_LE(sizeof(unittest::TestAllTypes), message1.SpaceUsed()); + const int empty_message_size = message1.SpaceUsed(); + + // Setting primitive types shouldn't affect the space used. + message1.set_optional_int32(123); + message1.set_optional_int64(12345); + message1.set_optional_uint32(123); + message1.set_optional_uint64(12345); + EXPECT_EQ(empty_message_size, message1.SpaceUsed()); + + // On some STL implementations, setting the string to a small value should + // only increase SpaceUsed() by the size of a string object, though this is + // not true everywhere. + message1.set_optional_string("abc"); + EXPECT_LE(empty_message_size + sizeof(string), message1.SpaceUsed()); + + // Setting a string to a value larger than the string object itself should + // increase SpaceUsed(), because it cannot store the value internally. + message1.set_optional_string(string(sizeof(string) + 1, 'x')); + int min_expected_increase = message1.optional_string().capacity() + + sizeof(string); + EXPECT_LE(empty_message_size + min_expected_increase, + message1.SpaceUsed()); + + int previous_size = message1.SpaceUsed(); + // Adding an optional message should increase the size by the size of the + // nested message type. NestedMessage is simple enough (1 int field) that it + // is equal to sizeof(NestedMessage) + message1.mutable_optional_nested_message(); + ASSERT_EQ(sizeof(unittest::TestAllTypes::NestedMessage), + message1.optional_nested_message().SpaceUsed()); + EXPECT_EQ(previous_size + + sizeof(unittest::TestAllTypes::NestedMessage), + message1.SpaceUsed()); +} + // =================================================================== TEST(GeneratedEnumTest, EnumValuesAsSwitchCases) { diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index 1144aab7..e1c0d0d1 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -215,6 +215,11 @@ bool Parser::ConsumeString(string* output, const char* error) { if (LookingAtType(io::Tokenizer::TYPE_STRING)) { io::Tokenizer::ParseString(input_->current().text, output); input_->Next(); + // Allow C++ like concatenation of adjacent string tokens. + while (LookingAtType(io::Tokenizer::TYPE_STRING)) { + io::Tokenizer::ParseStringAppend(input_->current().text, output); + input_->Next(); + } return true; } else { AddError(error); @@ -864,13 +869,24 @@ bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value) { if (is_negative) number *= -1; enum_value->set_number(number); - // TODO(kenton): Options for enum values? + DO(ParseEnumConstantOptions(enum_value)); DO(Consume(";")); return true; } +bool Parser::ParseEnumConstantOptions(EnumValueDescriptorProto* value) { + if (!TryConsume("[")) return true; + + do { + DO(ParseOptionAssignment(value->mutable_options())); + } while (TryConsume(",")); + + DO(Consume("]")); + return true; +} + // ------------------------------------------------------------------- // Services diff --git a/src/google/protobuf/compiler/parser.h b/src/google/protobuf/compiler/parser.h index e963c062..b670f740 100644 --- a/src/google/protobuf/compiler/parser.h +++ b/src/google/protobuf/compiler/parser.h @@ -236,6 +236,10 @@ class LIBPROTOBUF_EXPORT Parser { // Parse a single enum value within an enum block. bool ParseEnumConstant(EnumValueDescriptorProto* enum_value); + // Parse enum constant options, i.e. the list in square brackets at the end + // of the enum constant value definition. + bool ParseEnumConstantOptions(EnumValueDescriptorProto* value); + // Parse a single method within a service definition. bool ParseServiceMethod(MethodDescriptorProto* method); diff --git a/src/google/protobuf/compiler/parser_unittest.cc b/src/google/protobuf/compiler/parser_unittest.cc index 15cdd896..2d48c5ae 100644 --- a/src/google/protobuf/compiler/parser_unittest.cc +++ b/src/google/protobuf/compiler/parser_unittest.cc @@ -305,7 +305,9 @@ TEST_F(ParseMessageTest, FieldDefaults) { " required double foo = 1 [default=-11.5];\n" " required double foo = 1 [default= 12 ];\n" " required string foo = 1 [default='13\\001'];\n" + " required string foo = 1 [default='a' \"b\" \n \"c\"];\n" " required bytes foo = 1 [default='14\\002'];\n" + " required bytes foo = 1 [default='a' \"b\" \n 'c'];\n" " required bool foo = 1 [default=true ];\n" " required Foo foo = 1 [default=FOO ];\n" @@ -334,7 +336,9 @@ TEST_F(ParseMessageTest, FieldDefaults) { " field { type:TYPE_DOUBLE default_value:\"-11.5\" "ETC" }" " field { type:TYPE_DOUBLE default_value:\"12\" "ETC" }" " field { type:TYPE_STRING default_value:\"13\\001\" "ETC" }" + " field { type:TYPE_STRING default_value:\"abc\" "ETC" }" " field { type:TYPE_BYTES default_value:\"14\\\\002\" "ETC" }" + " field { type:TYPE_BYTES default_value:\"abc\" "ETC" }" " field { type:TYPE_BOOL default_value:\"true\" "ETC" }" " field { type_name:\"Foo\" default_value:\"FOO\" "ETC" }" @@ -534,6 +538,40 @@ TEST_F(ParseEnumTest, Values) { "}"); } +TEST_F(ParseEnumTest, ValueOptions) { + ExpectParsesTo( + "enum TestEnum {\n" + " FOO = 13;\n" + " BAR = -10 [ (something.text) = 'abc' ];\n" + " BAZ = 500 [ (something.text) = 'def', other = 1 ];\n" + "}\n", + + "enum_type {" + " name: \"TestEnum\"" + " value { name: \"FOO\" number: 13 }" + " value { name: \"BAR\" number: -10 " + " options { " + " uninterpreted_option { " + " name { name_part: \"something.text\" is_extension: true } " + " string_value: \"abc\" " + " } " + " } " + " } " + " value { name: \"BAZ\" number: 500 " + " options { " + " uninterpreted_option { " + " name { name_part: \"something.text\" is_extension: true } " + " string_value: \"def\" " + " } " + " uninterpreted_option { " + " name { name_part: \"other\" is_extension: false } " + " positive_int_value: 1 " + " } " + " } " + " } " + "}"); +} + // =================================================================== typedef ParserTest ParseServiceTest; diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index 882b104b..21c709fb 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -1656,6 +1656,10 @@ class DescriptorBuilder { // dependencies. Symbol FindSymbol(const string& name); + // Like FindSymbol() but does not require that the symbol is in one of the + // file's declared dependencies. + Symbol FindSymbolNotEnforcingDeps(const string& name); + // Like FindSymbol(), but looks up the name relative to some other symbol // name. This first searches siblings of relative_to, then siblings of its // parents, etc. For example, LookupSymbol("foo.bar", "baz.qux.corge") makes @@ -2016,7 +2020,7 @@ bool DescriptorBuilder::IsInPackage(const FileDescriptor* file, file->package()[package_name.size()] == '.'); } -Symbol DescriptorBuilder::FindSymbol(const string& name) { +Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(const string& name) { Symbol result; // We need to search our pool and all its underlays. @@ -2035,6 +2039,12 @@ Symbol DescriptorBuilder::FindSymbol(const string& name) { pool = pool->underlay_; } + return result; +} + +Symbol DescriptorBuilder::FindSymbol(const string& name) { + Symbol result = FindSymbolNotEnforcingDeps(name); + if (!pool_->enforce_dependencies_) { // Hack for CompilerUpgrader. return result; @@ -3315,7 +3325,8 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption( // Note that we use DescriptorBuilder::FindSymbol(), not // DescriptorPool::FindMessageTypeByName() because we're already holding the // pool's mutex, and the latter method locks it again. - Symbol symbol = builder_->FindSymbol(options->GetDescriptor()->full_name()); + Symbol symbol = builder_->FindSymbolNotEnforcingDeps( + options->GetDescriptor()->full_name()); if (!symbol.IsNull() && symbol.type == Symbol::MESSAGE) { options_descriptor = symbol.descriptor; } else { @@ -3362,11 +3373,14 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption( debug_msg_name += name_part; // Search for the field's descriptor as a regular field in the builder's // pool. First we must qualify it by its message name. Note that we use - // DescriptorBuilder::FindSymbol(), not DescriptorPool::FindFieldByName() - // because we're already holding the pool's mutex, and the latter method - // locks it again. + // DescriptorBuilder::FindSymbolNotEnforcingDeps(), not + // DescriptorPool::FindFieldByName() because we're already holding the + // pool's mutex, and the latter method locks it again. We must not + // enforce dependencies here because we did not enforce dependencies + // when looking up |descriptor|, and we need the two to match. string fully_qualified_name = descriptor->full_name() + "." + name_part; - Symbol symbol = builder_->FindSymbol(fully_qualified_name); + Symbol symbol = + builder_->FindSymbolNotEnforcingDeps(fully_qualified_name); if (!symbol.IsNull() && symbol.type == Symbol::FIELD) { field = symbol.field_descriptor; } else { @@ -3378,7 +3392,7 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption( } } - if (!field) { + if (field == NULL) { return AddNameError("Option \"" + debug_msg_name + "\" unknown."); } else if (field->containing_type() != descriptor) { // This can only happen if, due to some insane misconfiguration of the @@ -3670,10 +3684,11 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue( fully_qualified_name += value_name; // Search for the enum value's descriptor in the builder's pool. Note - // that we use DescriptorBuilder::LookupSymbol(), not + // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not // DescriptorPool::FindEnumValueByName() because we're already holding // the pool's mutex, and the latter method locks it again. - Symbol symbol = builder_->FindSymbol(fully_qualified_name); + Symbol symbol = + builder_->FindSymbolNotEnforcingDeps(fully_qualified_name); if (!symbol.IsNull() && symbol.type == Symbol::ENUM_VALUE) { if (symbol.enum_value_descriptor->type() != enum_type) { return AddValueError("Enum type \"" + enum_type->full_name() + diff --git a/src/google/protobuf/descriptor.pb.cc b/src/google/protobuf/descriptor.pb.cc index 4def00a2..ceb99bf1 100644 --- a/src/google/protobuf/descriptor.pb.cc +++ b/src/google/protobuf/descriptor.pb.cc @@ -87,7 +87,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorSet, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorSet, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(FileDescriptorSet)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( FileDescriptorSet_descriptor_, FileDescriptorSet::default_instance_); FileDescriptorProto_descriptor_ = file->message_type(1); @@ -110,7 +111,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(FileDescriptorProto)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( FileDescriptorProto_descriptor_, FileDescriptorProto::default_instance_); DescriptorProto_descriptor_ = file->message_type(2); @@ -132,7 +134,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(DescriptorProto)); DescriptorProto_ExtensionRange_descriptor_ = DescriptorProto_descriptor_->nested_type(0); DescriptorProto_ExtensionRange::default_instance_ = new DescriptorProto_ExtensionRange(); static const int DescriptorProto_ExtensionRange_offsets_[2] = { @@ -147,7 +150,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto_ExtensionRange, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto_ExtensionRange, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(DescriptorProto_ExtensionRange)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( DescriptorProto_ExtensionRange_descriptor_, DescriptorProto_ExtensionRange::default_instance_); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( @@ -172,7 +176,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(FieldDescriptorProto)); FieldDescriptorProto_Type_descriptor_ = FieldDescriptorProto_descriptor_->enum_type(0); FieldDescriptorProto_Label_descriptor_ = FieldDescriptorProto_descriptor_->enum_type(1); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( @@ -192,7 +197,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(EnumDescriptorProto)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( EnumDescriptorProto_descriptor_, EnumDescriptorProto::default_instance_); EnumValueDescriptorProto_descriptor_ = file->message_type(5); @@ -210,7 +216,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(EnumValueDescriptorProto)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( EnumValueDescriptorProto_descriptor_, EnumValueDescriptorProto::default_instance_); ServiceDescriptorProto_descriptor_ = file->message_type(6); @@ -228,7 +235,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(ServiceDescriptorProto)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( ServiceDescriptorProto_descriptor_, ServiceDescriptorProto::default_instance_); MethodDescriptorProto_descriptor_ = file->message_type(7); @@ -247,7 +255,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodDescriptorProto, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodDescriptorProto, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(MethodDescriptorProto)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( MethodDescriptorProto_descriptor_, MethodDescriptorProto::default_instance_); FileOptions_descriptor_ = file->message_type(8); @@ -267,7 +276,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(FileOptions)); FileOptions_OptimizeMode_descriptor_ = FileOptions_descriptor_->enum_type(0); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( FileOptions_descriptor_, FileOptions::default_instance_); @@ -285,7 +295,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(MessageOptions)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( MessageOptions_descriptor_, MessageOptions::default_instance_); FieldOptions_descriptor_ = file->message_type(10); @@ -303,7 +314,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(FieldOptions)); FieldOptions_CType_descriptor_ = FieldOptions_descriptor_->enum_type(0); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( FieldOptions_descriptor_, FieldOptions::default_instance_); @@ -320,7 +332,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(EnumOptions)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( EnumOptions_descriptor_, EnumOptions::default_instance_); EnumValueOptions_descriptor_ = file->message_type(12); @@ -336,7 +349,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(EnumValueOptions)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( EnumValueOptions_descriptor_, EnumValueOptions::default_instance_); ServiceOptions_descriptor_ = file->message_type(13); @@ -352,7 +366,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(ServiceOptions)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( ServiceOptions_descriptor_, ServiceOptions::default_instance_); MethodOptions_descriptor_ = file->message_type(14); @@ -368,7 +383,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _unknown_fields_), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _extensions_), - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(MethodOptions)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( MethodOptions_descriptor_, MethodOptions::default_instance_); UninterpretedOption_descriptor_ = file->message_type(15); @@ -389,7 +405,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(UninterpretedOption)); UninterpretedOption_NamePart_descriptor_ = UninterpretedOption_descriptor_->nested_type(0); UninterpretedOption_NamePart::default_instance_ = new UninterpretedOption_NamePart(); static const int UninterpretedOption_NamePart_offsets_[2] = { @@ -404,7 +421,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption_NamePart, _has_bits_[0]), GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption_NamePart, _unknown_fields_), -1, - ::google::protobuf::DescriptorPool::generated_pool()); + ::google::protobuf::DescriptorPool::generated_pool(), + sizeof(UninterpretedOption_NamePart)); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( UninterpretedOption_NamePart_descriptor_, UninterpretedOption_NamePart::default_instance_); ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage( @@ -683,6 +701,15 @@ void FileDescriptorSet::CopyFrom(const FileDescriptorSet& from) { MergeFrom(from); } +void FileDescriptorSet::Swap(FileDescriptorSet* other) { + if (other != this) { + file_.Swap(&other->file_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool FileDescriptorSet::IsInitialized() const { for (int i = 0; i < file_size(); i++) { @@ -1081,6 +1108,22 @@ void FileDescriptorProto::CopyFrom(const FileDescriptorProto& from) { MergeFrom(from); } +void FileDescriptorProto::Swap(FileDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + std::swap(package_, other->package_); + dependency_.Swap(&other->dependency_); + message_type_.Swap(&other->message_type_); + enum_type_.Swap(&other->enum_type_); + service_.Swap(&other->service_); + extension_.Swap(&other->extension_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool FileDescriptorProto::IsInitialized() const { for (int i = 0; i < message_type_size(); i++) { @@ -1298,6 +1341,16 @@ void DescriptorProto_ExtensionRange::CopyFrom(const DescriptorProto_ExtensionRan MergeFrom(from); } +void DescriptorProto_ExtensionRange::Swap(DescriptorProto_ExtensionRange* other) { + if (other != this) { + std::swap(start_, other->start_); + std::swap(end_, other->end_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool DescriptorProto_ExtensionRange::IsInitialized() const { return true; @@ -1657,6 +1710,21 @@ void DescriptorProto::CopyFrom(const DescriptorProto& from) { MergeFrom(from); } +void DescriptorProto::Swap(DescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + field_.Swap(&other->field_); + extension_.Swap(&other->extension_); + nested_type_.Swap(&other->nested_type_); + enum_type_.Swap(&other->enum_type_); + extension_range_.Swap(&other->extension_range_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool DescriptorProto::IsInitialized() const { for (int i = 0; i < field_size(); i++) { @@ -2171,6 +2239,22 @@ void FieldDescriptorProto::CopyFrom(const FieldDescriptorProto& from) { MergeFrom(from); } +void FieldDescriptorProto::Swap(FieldDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + std::swap(number_, other->number_); + std::swap(label_, other->label_); + std::swap(type_, other->type_); + std::swap(type_name_, other->type_name_); + std::swap(extendee_, other->extendee_); + std::swap(default_value_, other->default_value_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool FieldDescriptorProto::IsInitialized() const { if (has_options()) { @@ -2413,6 +2497,17 @@ void EnumDescriptorProto::CopyFrom(const EnumDescriptorProto& from) { MergeFrom(from); } +void EnumDescriptorProto::Swap(EnumDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + value_.Swap(&other->value_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool EnumDescriptorProto::IsInitialized() const { for (int i = 0; i < value_size(); i++) { @@ -2661,6 +2756,17 @@ void EnumValueDescriptorProto::CopyFrom(const EnumValueDescriptorProto& from) { MergeFrom(from); } +void EnumValueDescriptorProto::Swap(EnumValueDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + std::swap(number_, other->number_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool EnumValueDescriptorProto::IsInitialized() const { if (has_options()) { @@ -2903,6 +3009,17 @@ void ServiceDescriptorProto::CopyFrom(const ServiceDescriptorProto& from) { MergeFrom(from); } +void ServiceDescriptorProto::Swap(ServiceDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + method_.Swap(&other->method_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool ServiceDescriptorProto::IsInitialized() const { for (int i = 0; i < method_size(); i++) { @@ -3192,6 +3309,18 @@ void MethodDescriptorProto::CopyFrom(const MethodDescriptorProto& from) { MergeFrom(from); } +void MethodDescriptorProto::Swap(MethodDescriptorProto* other) { + if (other != this) { + std::swap(name_, other->name_); + std::swap(input_type_, other->input_type_); + std::swap(output_type_, other->output_type_); + std::swap(options_, other->options_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool MethodDescriptorProto::IsInitialized() const { if (has_options()) { @@ -3540,6 +3669,20 @@ void FileOptions::CopyFrom(const FileOptions& from) { MergeFrom(from); } +void FileOptions::Swap(FileOptions* other) { + if (other != this) { + std::swap(java_package_, other->java_package_); + std::swap(java_outer_classname_, other->java_outer_classname_); + std::swap(java_multiple_files_, other->java_multiple_files_); + std::swap(optimize_for_, other->optimize_for_); + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool FileOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -3759,6 +3902,17 @@ void MessageOptions::CopyFrom(const MessageOptions& from) { MergeFrom(from); } +void MessageOptions::Swap(MessageOptions* other) { + if (other != this) { + std::swap(message_set_wire_format_, other->message_set_wire_format_); + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool MessageOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4040,6 +4194,18 @@ void FieldOptions::CopyFrom(const FieldOptions& from) { MergeFrom(from); } +void FieldOptions::Swap(FieldOptions* other) { + if (other != this) { + std::swap(ctype_, other->ctype_); + std::swap(experimental_map_key_, other->experimental_map_key_); + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool FieldOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4223,6 +4389,16 @@ void EnumOptions::CopyFrom(const EnumOptions& from) { MergeFrom(from); } +void EnumOptions::Swap(EnumOptions* other) { + if (other != this) { + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool EnumOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4406,6 +4582,16 @@ void EnumValueOptions::CopyFrom(const EnumValueOptions& from) { MergeFrom(from); } +void EnumValueOptions::Swap(EnumValueOptions* other) { + if (other != this) { + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool EnumValueOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4589,6 +4775,16 @@ void ServiceOptions::CopyFrom(const ServiceOptions& from) { MergeFrom(from); } +void ServiceOptions::Swap(ServiceOptions* other) { + if (other != this) { + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool ServiceOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4772,6 +4968,16 @@ void MethodOptions::CopyFrom(const MethodOptions& from) { MergeFrom(from); } +void MethodOptions::Swap(MethodOptions* other) { + if (other != this) { + uninterpreted_option_.Swap(&other->uninterpreted_option_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + _extensions_.Swap(&other->_extensions_); + } +} + bool MethodOptions::IsInitialized() const { for (int i = 0; i < uninterpreted_option_size(); i++) { @@ -4980,6 +5186,16 @@ void UninterpretedOption_NamePart::CopyFrom(const UninterpretedOption_NamePart& MergeFrom(from); } +void UninterpretedOption_NamePart::Swap(UninterpretedOption_NamePart* other) { + if (other != this) { + std::swap(name_part_, other->name_part_); + std::swap(is_extension_, other->is_extension_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool UninterpretedOption_NamePart::IsInitialized() const { if ((_has_bits_[0] & 0x00000003) != 0x00000003) return false; @@ -5319,6 +5535,20 @@ void UninterpretedOption::CopyFrom(const UninterpretedOption& from) { MergeFrom(from); } +void UninterpretedOption::Swap(UninterpretedOption* other) { + if (other != this) { + name_.Swap(&other->name_); + std::swap(identifier_value_, other->identifier_value_); + std::swap(positive_int_value_, other->positive_int_value_); + std::swap(negative_int_value_, other->negative_int_value_); + std::swap(double_value_, other->double_value_); + std::swap(string_value_, other->string_value_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + _unknown_fields_.Swap(&other->_unknown_fields_); + std::swap(_cached_size_, other->_cached_size_); + } +} + bool UninterpretedOption::IsInitialized() const { for (int i = 0; i < name_size(); i++) { diff --git a/src/google/protobuf/descriptor.pb.h b/src/google/protobuf/descriptor.pb.h index 4e716014..693f0f9b 100644 --- a/src/google/protobuf/descriptor.pb.h +++ b/src/google/protobuf/descriptor.pb.h @@ -126,6 +126,8 @@ class LIBPROTOBUF_EXPORT FileDescriptorSet : public ::google::protobuf::Message static const ::google::protobuf::Descriptor* descriptor(); static const FileDescriptorSet& default_instance(); + void Swap(FileDescriptorSet* other); + // implements Message ---------------------------------------------- FileDescriptorSet* New() const; @@ -209,6 +211,8 @@ class LIBPROTOBUF_EXPORT FileDescriptorProto : public ::google::protobuf::Messag static const ::google::protobuf::Descriptor* descriptor(); static const FileDescriptorProto& default_instance(); + void Swap(FileDescriptorProto* other); + // implements Message ---------------------------------------------- FileDescriptorProto* New() const; @@ -363,6 +367,8 @@ class LIBPROTOBUF_EXPORT DescriptorProto_ExtensionRange : public ::google::proto static const ::google::protobuf::Descriptor* descriptor(); static const DescriptorProto_ExtensionRange& default_instance(); + void Swap(DescriptorProto_ExtensionRange* other); + // implements Message ---------------------------------------------- DescriptorProto_ExtensionRange* New() const; @@ -450,6 +456,8 @@ class LIBPROTOBUF_EXPORT DescriptorProto : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const DescriptorProto& default_instance(); + void Swap(DescriptorProto* other); + // implements Message ---------------------------------------------- DescriptorProto* New() const; @@ -592,6 +600,8 @@ class LIBPROTOBUF_EXPORT FieldDescriptorProto : public ::google::protobuf::Messa static const ::google::protobuf::Descriptor* descriptor(); static const FieldDescriptorProto& default_instance(); + void Swap(FieldDescriptorProto* other); + // implements Message ---------------------------------------------- FieldDescriptorProto* New() const; @@ -780,6 +790,8 @@ class LIBPROTOBUF_EXPORT EnumDescriptorProto : public ::google::protobuf::Messag static const ::google::protobuf::Descriptor* descriptor(); static const EnumDescriptorProto& default_instance(); + void Swap(EnumDescriptorProto* other); + // implements Message ---------------------------------------------- EnumDescriptorProto* New() const; @@ -880,6 +892,8 @@ class LIBPROTOBUF_EXPORT EnumValueDescriptorProto : public ::google::protobuf::M static const ::google::protobuf::Descriptor* descriptor(); static const EnumValueDescriptorProto& default_instance(); + void Swap(EnumValueDescriptorProto* other); + // implements Message ---------------------------------------------- EnumValueDescriptorProto* New() const; @@ -977,6 +991,8 @@ class LIBPROTOBUF_EXPORT ServiceDescriptorProto : public ::google::protobuf::Mes static const ::google::protobuf::Descriptor* descriptor(); static const ServiceDescriptorProto& default_instance(); + void Swap(ServiceDescriptorProto* other); + // implements Message ---------------------------------------------- ServiceDescriptorProto* New() const; @@ -1077,6 +1093,8 @@ class LIBPROTOBUF_EXPORT MethodDescriptorProto : public ::google::protobuf::Mess static const ::google::protobuf::Descriptor* descriptor(); static const MethodDescriptorProto& default_instance(); + void Swap(MethodDescriptorProto* other); + // implements Message ---------------------------------------------- MethodDescriptorProto* New() const; @@ -1187,6 +1205,8 @@ class LIBPROTOBUF_EXPORT FileOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const FileOptions& default_instance(); + void Swap(FileOptions* other); + // implements Message ---------------------------------------------- FileOptions* New() const; @@ -1401,6 +1421,8 @@ class LIBPROTOBUF_EXPORT MessageOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const MessageOptions& default_instance(); + void Swap(MessageOptions* other); + // implements Message ---------------------------------------------- MessageOptions* New() const; @@ -1573,6 +1595,8 @@ class LIBPROTOBUF_EXPORT FieldOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const FieldOptions& default_instance(); + void Swap(FieldOptions* other); + // implements Message ---------------------------------------------- FieldOptions* New() const; @@ -1770,6 +1794,8 @@ class LIBPROTOBUF_EXPORT EnumOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const EnumOptions& default_instance(); + void Swap(EnumOptions* other); + // implements Message ---------------------------------------------- EnumOptions* New() const; @@ -1935,6 +1961,8 @@ class LIBPROTOBUF_EXPORT EnumValueOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const EnumValueOptions& default_instance(); + void Swap(EnumValueOptions* other); + // implements Message ---------------------------------------------- EnumValueOptions* New() const; @@ -2100,6 +2128,8 @@ class LIBPROTOBUF_EXPORT ServiceOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const ServiceOptions& default_instance(); + void Swap(ServiceOptions* other); + // implements Message ---------------------------------------------- ServiceOptions* New() const; @@ -2265,6 +2295,8 @@ class LIBPROTOBUF_EXPORT MethodOptions : public ::google::protobuf::Message { static const ::google::protobuf::Descriptor* descriptor(); static const MethodOptions& default_instance(); + void Swap(MethodOptions* other); + // implements Message ---------------------------------------------- MethodOptions* New() const; @@ -2430,6 +2462,8 @@ class LIBPROTOBUF_EXPORT UninterpretedOption_NamePart : public ::google::protobu static const ::google::protobuf::Descriptor* descriptor(); static const UninterpretedOption_NamePart& default_instance(); + void Swap(UninterpretedOption_NamePart* other); + // implements Message ---------------------------------------------- UninterpretedOption_NamePart* New() const; @@ -2520,6 +2554,8 @@ class LIBPROTOBUF_EXPORT UninterpretedOption : public ::google::protobuf::Messag static const ::google::protobuf::Descriptor* descriptor(); static const UninterpretedOption& default_instance(); + void Swap(UninterpretedOption* other); + // implements Message ---------------------------------------------- UninterpretedOption* New() const; diff --git a/src/google/protobuf/descriptor.proto b/src/google/protobuf/descriptor.proto index 3f91c187..9cdd61c5 100644 --- a/src/google/protobuf/descriptor.proto +++ b/src/google/protobuf/descriptor.proto @@ -252,6 +252,7 @@ message FileOptions { } optional OptimizeMode optimize_for = 9 [default=CODE_SIZE]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index b7dac69a..3a2b52bd 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -1658,6 +1658,61 @@ TEST(CustomOptions, ComplexExtensionOptions) { EXPECT_EQ(24, options->GetExtension(protobuf_unittest::complexopt6).xyzzy()); } +TEST(CustomOptions, OptionsFromOtherFile) { + // Test that to use a custom option, we only need to import the file + // defining the option; we do not also have to import descriptor.proto. + DescriptorPool pool; + + FileDescriptorProto file_proto; + FileDescriptorProto::descriptor()->file()->CopyTo(&file_proto); + ASSERT_TRUE(pool.BuildFile(file_proto) != NULL); + + protobuf_unittest::TestMessageWithCustomOptions::descriptor() + ->file()->CopyTo(&file_proto); + ASSERT_TRUE(pool.BuildFile(file_proto) != NULL); + + ASSERT_TRUE(TextFormat::ParseFromString( + "name: \"custom_options_import.proto\" " + "package: \"protobuf_unittest\" " + "dependency: \"google/protobuf/unittest_custom_options.proto\" " + "options { " + " uninterpreted_option { " + " name { " + " name_part: \"file_opt1\" " + " is_extension: true " + " } " + " positive_int_value: 1234 " + " } " + // Test a non-extension option too. (At one point this failed due to a + // bug.) + " uninterpreted_option { " + " name { " + " name_part: \"java_package\" " + " is_extension: false " + " } " + " string_value: \"foo\" " + " } " + // Test that enum-typed options still work too. (At one point this also + // failed due to a bug.) + " uninterpreted_option { " + " name { " + " name_part: \"optimize_for\" " + " is_extension: false " + " } " + " identifier_value: \"SPEED\" " + " } " + "}" + , + &file_proto)); + + const FileDescriptor* file = pool.BuildFile(file_proto); + ASSERT_TRUE(file != NULL); + EXPECT_EQ(1234, file->options().GetExtension(protobuf_unittest::file_opt1)); + EXPECT_TRUE(file->options().has_java_package()); + EXPECT_EQ("foo", file->options().java_package()); + EXPECT_TRUE(file->options().has_optimize_for()); + EXPECT_EQ(FileOptions::SPEED, file->options().optimize_for()); +} // =================================================================== diff --git a/src/google/protobuf/dynamic_message.cc b/src/google/protobuf/dynamic_message.cc index d2e855b8..b969c13b 100644 --- a/src/google/protobuf/dynamic_message.cc +++ b/src/google/protobuf/dynamic_message.cc @@ -507,7 +507,8 @@ const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { type_info->has_bits_offset, type_info->unknown_fields_offset, type_info->extensions_offset, - type_info->pool)); + type_info->pool, + type_info->size)); // Cross link prototypes. prototype->CrossLinkPrototypes(); diff --git a/src/google/protobuf/dynamic_message_unittest.cc b/src/google/protobuf/dynamic_message_unittest.cc index 2167475d..5f7af94e 100644 --- a/src/google/protobuf/dynamic_message_unittest.cc +++ b/src/google/protobuf/dynamic_message_unittest.cc @@ -127,5 +127,20 @@ TEST_F(DynamicMessageTest, Extensions) { reflection_tester.ExpectAllFieldsSetViaReflection(*message); } +TEST_F(DynamicMessageTest, SpaceUsed) { + // Test that SpaceUsed() works properly + + // Since we share the implementation with generated messages, we don't need + // to test very much here. Just make sure it appears to be working. + + scoped_ptr<Message> message(prototype_->New()); + TestUtil::ReflectionTester reflection_tester(descriptor_); + + int initial_space_used = message->SpaceUsed(); + + reflection_tester.SetAllFieldsViaReflection(message.get()); + EXPECT_LT(initial_space_used, message->SpaceUsed()); +} + } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/extension_set.cc b/src/google/protobuf/extension_set.cc index 0b59c432..f431cedc 100644 --- a/src/google/protobuf/extension_set.cc +++ b/src/google/protobuf/extension_set.cc @@ -40,6 +40,7 @@ #include <google/protobuf/io/coded_stream.h> #include <google/protobuf/wire_format.h> #include <google/protobuf/repeated_field.h> +#include <google/protobuf/generated_message_reflection.h> namespace google { namespace protobuf { @@ -515,6 +516,13 @@ void ExtensionSet::MergeFrom(const ExtensionSet& other) { } } +void ExtensionSet::Swap(ExtensionSet* x) { + extensions_.swap(x->extensions_); + std::swap(extendee_, x->extendee_); + std::swap(descriptor_pool_, x->descriptor_pool_); + std::swap(message_factory_, x->message_factory_); +} + bool ExtensionSet::IsInitialized() const { // Extensions are never requried. However, we need to check that all // embedded messages are initialized. @@ -575,6 +583,18 @@ int ExtensionSet::ByteSize(const Message& message) const { return total_size; } +int ExtensionSet::SpaceUsedExcludingSelf() const { + int total_size = + extensions_.size() * sizeof(map<int, Extension>::value_type); + for (map<int, Extension>::const_iterator iter = extensions_.begin(), + end = extensions_.end(); + iter != end; + ++iter) { + total_size += iter->second.SpaceUsedExcludingSelf(); + } + return total_size; +} + // =================================================================== // Methods of ExtensionSet::Extension @@ -712,6 +732,44 @@ void ExtensionSet::Extension::Free() { } } +int ExtensionSet::Extension::SpaceUsedExcludingSelf() const { + int total_size = 0; + if (descriptor->is_repeated()) { + switch (descriptor->cpp_type()) { +#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ + case FieldDescriptor::CPPTYPE_##UPPERCASE: \ + total_size += sizeof(*repeated_##LOWERCASE##_value) + \ + repeated_##LOWERCASE##_value->SpaceUsedExcludingSelf();\ + break + + HANDLE_TYPE( INT32, int32); + HANDLE_TYPE( INT64, int64); + HANDLE_TYPE( UINT32, uint32); + HANDLE_TYPE( UINT64, uint64); + HANDLE_TYPE( FLOAT, float); + HANDLE_TYPE( DOUBLE, double); + HANDLE_TYPE( BOOL, bool); + HANDLE_TYPE( ENUM, enum); + HANDLE_TYPE( STRING, string); + HANDLE_TYPE(MESSAGE, message); + } + } else { + switch (descriptor->cpp_type()) { + case FieldDescriptor::CPPTYPE_STRING: + total_size += sizeof(*string_value) + + StringSpaceUsedExcludingSelf(*string_value); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + total_size += message_value->SpaceUsed(); + break; + default: + // No extra storage costs for primitive types. + break; + } + } + return total_size; +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/extension_set.h b/src/google/protobuf/extension_set.h index 1acc2c5c..9ad241f2 100644 --- a/src/google/protobuf/extension_set.h +++ b/src/google/protobuf/extension_set.h @@ -209,6 +209,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { void Clear(); void MergeFrom(const ExtensionSet& other); + void Swap(ExtensionSet* other); bool IsInitialized() const; // These parsing and serialization functions all want a pointer to the @@ -234,6 +235,10 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // Returns the total serialized size of all the extensions. int ByteSize(const Message& message) const; + // Returns (an estimate of) the total number of bytes used for storing the + // extensions in memory, excluding sizeof(*this). + int SpaceUsedExcludingSelf() const; + private: // Like FindKnownExtension(), but GOOGLE_CHECK-fail if not found. const FieldDescriptor* FindKnownExtensionOrDie(int number) const; @@ -286,6 +291,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { void Clear(); int GetSize() const; void Free(); + int SpaceUsedExcludingSelf() const; }; // The Extension struct is small enough to be passed by value, so we use it diff --git a/src/google/protobuf/extension_set_unittest.cc b/src/google/protobuf/extension_set_unittest.cc index bcd14f92..c3ac7ce7 100644 --- a/src/google/protobuf/extension_set_unittest.cc +++ b/src/google/protobuf/extension_set_unittest.cc @@ -136,6 +136,36 @@ TEST(ExtensionSetTest, CopyFrom) { TestUtil::ExpectAllExtensionsSet(message2); } +TEST(ExtensionSetTest, CopyFromUpcasted) { + unittest::TestAllExtensions message1, message2; + string data; + const Message& upcasted_message = message1; + + TestUtil::SetAllExtensions(&message1); + message2.CopyFrom(upcasted_message); + TestUtil::ExpectAllExtensionsSet(message2); +} + +TEST(ExtensionSetTest, SwapWithEmpty) { + unittest::TestAllExtensions message1, message2; + TestUtil::SetAllExtensions(&message1); + + TestUtil::ExpectAllExtensionsSet(message1); + TestUtil::ExpectExtensionsClear(message2); + message1.Swap(&message2); + TestUtil::ExpectAllExtensionsSet(message2); + TestUtil::ExpectExtensionsClear(message1); +} + +TEST(ExtensionSetTest, SwapWithSelf) { + unittest::TestAllExtensions message; + TestUtil::SetAllExtensions(&message); + + TestUtil::ExpectAllExtensionsSet(message); + message.Swap(&message); + TestUtil::ExpectAllExtensionsSet(message); +} + TEST(ExtensionSetTest, Serialization) { // Serialize as TestAllExtensions and parse as TestAllTypes to insure wire // compatibility of extensions. @@ -203,6 +233,143 @@ TEST(ExtensionSetTest, MutableString) { message.GetExtension(unittest::repeated_string_extension, 0)); } +TEST(ExtensionSetTest, SpaceUsedExcludingSelf) { + // Scalar primitive extensions should increase the extension set size by a + // minimum of the size of the primitive type. +#define TEST_SCALAR_EXTENSIONS_SPACE_USED(type, value) \ + do { \ + unittest::TestAllExtensions message; \ + const int base_size = message.SpaceUsed(); \ + message.SetExtension(unittest::optional_##type##_extension, value); \ + int min_expected_size = base_size + \ + sizeof(message.GetExtension(unittest::optional_##type##_extension)); \ + EXPECT_LE(min_expected_size, message.SpaceUsed()); \ + } while (0) + + TEST_SCALAR_EXTENSIONS_SPACE_USED(int32 , 101); + TEST_SCALAR_EXTENSIONS_SPACE_USED(int64 , 102); + TEST_SCALAR_EXTENSIONS_SPACE_USED(uint32 , 103); + TEST_SCALAR_EXTENSIONS_SPACE_USED(uint64 , 104); + TEST_SCALAR_EXTENSIONS_SPACE_USED(sint32 , 105); + TEST_SCALAR_EXTENSIONS_SPACE_USED(sint64 , 106); + TEST_SCALAR_EXTENSIONS_SPACE_USED(fixed32 , 107); + TEST_SCALAR_EXTENSIONS_SPACE_USED(fixed64 , 108); + TEST_SCALAR_EXTENSIONS_SPACE_USED(sfixed32, 109); + TEST_SCALAR_EXTENSIONS_SPACE_USED(sfixed64, 110); + TEST_SCALAR_EXTENSIONS_SPACE_USED(float , 111); + TEST_SCALAR_EXTENSIONS_SPACE_USED(double , 112); + TEST_SCALAR_EXTENSIONS_SPACE_USED(bool , true); +#undef TEST_SCALAR_EXTENSIONS_SPACE_USED + { + unittest::TestAllExtensions message; + const int base_size = message.SpaceUsed(); + message.SetExtension(unittest::optional_nested_enum_extension, + unittest::TestAllTypes::FOO); + int min_expected_size = base_size + + sizeof(message.GetExtension(unittest::optional_nested_enum_extension)); + EXPECT_LE(min_expected_size, message.SpaceUsed()); + } + { + // Strings may cause extra allocations depending on their length; ensure + // that gets included as well. + unittest::TestAllExtensions message; + const int base_size = message.SpaceUsed(); + const string s("this is a fairly large string that will cause some " + "allocation in order to store it in the extension"); + message.SetExtension(unittest::optional_string_extension, s); + int min_expected_size = base_size + s.length(); + EXPECT_LE(min_expected_size, message.SpaceUsed()); + } + { + // Messages also have additional allocation that need to be counted. + unittest::TestAllExtensions message; + const int base_size = message.SpaceUsed(); + unittest::ForeignMessage foreign; + foreign.set_c(42); + message.MutableExtension(unittest::optional_foreign_message_extension)-> + CopyFrom(foreign); + int min_expected_size = base_size + foreign.SpaceUsed(); + EXPECT_LE(min_expected_size, message.SpaceUsed()); + } + + // Repeated primitive extensions will increase space used by at least a + // RepeatedField<T>, and will cause additional allocations when the array + // gets too big for the initial space. + // This macro: + // - Adds a value to the repeated extension, then clears it, establishing + // the base size. + // - Adds a small number of values, testing that it doesn't increase the + // SpaceUsed() + // - Adds a large number of values (requiring allocation in the repeated + // field), and ensures that that allocation is included in SpaceUsed() +#define TEST_REPEATED_EXTENSIONS_SPACE_USED(type, cpptype, value) \ + do { \ + unittest::TestAllExtensions message; \ + const int base_size = message.SpaceUsed(); \ + int min_expected_size = sizeof(RepeatedField<cpptype>) + base_size; \ + message.AddExtension(unittest::repeated_##type##_extension, value); \ + message.ClearExtension(unittest::repeated_##type##_extension); \ + const int empty_repeated_field_size = message.SpaceUsed(); \ + EXPECT_LE(min_expected_size, empty_repeated_field_size) << #type; \ + message.AddExtension(unittest::repeated_##type##_extension, value); \ + message.AddExtension(unittest::repeated_##type##_extension, value); \ + EXPECT_EQ(empty_repeated_field_size, message.SpaceUsed()) << #type; \ + message.ClearExtension(unittest::repeated_##type##_extension); \ + for (int i = 0; i < 16; ++i) { \ + message.AddExtension(unittest::repeated_##type##_extension, value); \ + } \ + int expected_size = sizeof(cpptype) * 16 + empty_repeated_field_size; \ + EXPECT_EQ(expected_size, message.SpaceUsed()) << #type; \ + } while (0) + + TEST_REPEATED_EXTENSIONS_SPACE_USED(int32 , int32 , 101); + TEST_REPEATED_EXTENSIONS_SPACE_USED(int64 , int64 , 102); + TEST_REPEATED_EXTENSIONS_SPACE_USED(uint32 , uint32, 103); + TEST_REPEATED_EXTENSIONS_SPACE_USED(uint64 , uint64, 104); + TEST_REPEATED_EXTENSIONS_SPACE_USED(sint32 , int32 , 105); + TEST_REPEATED_EXTENSIONS_SPACE_USED(sint64 , int64 , 106); + TEST_REPEATED_EXTENSIONS_SPACE_USED(fixed32 , uint32, 107); + TEST_REPEATED_EXTENSIONS_SPACE_USED(fixed64 , uint64, 108); + TEST_REPEATED_EXTENSIONS_SPACE_USED(sfixed32, int32 , 109); + TEST_REPEATED_EXTENSIONS_SPACE_USED(sfixed64, int64 , 110); + TEST_REPEATED_EXTENSIONS_SPACE_USED(float , float , 111); + TEST_REPEATED_EXTENSIONS_SPACE_USED(double , double, 112); + TEST_REPEATED_EXTENSIONS_SPACE_USED(bool , bool , true); + TEST_REPEATED_EXTENSIONS_SPACE_USED(nested_enum, int, + unittest::TestAllTypes::FOO); +#undef TEST_REPEATED_EXTENSIONS_SPACE_USED + // Repeated strings + { + unittest::TestAllExtensions message; + const int base_size = message.SpaceUsed(); + int min_expected_size = sizeof(RepeatedPtrField<string>) + base_size; + const string value(256, 'x'); + // Once items are allocated, they may stick around even when cleared so + // without the hardcore memory management accessors there isn't a notion of + // the empty repeated field memory usage as there is with primitive types. + for (int i = 0; i < 16; ++i) { + message.AddExtension(unittest::repeated_string_extension, value); + } + min_expected_size += (sizeof(value) + value.size()) * 16; + EXPECT_LE(min_expected_size, message.SpaceUsed()); + } + // Repeated messages + { + unittest::TestAllExtensions message; + const int base_size = message.SpaceUsed(); + int min_expected_size = sizeof(RepeatedPtrField<unittest::ForeignMessage>) + + base_size; + unittest::ForeignMessage prototype; + prototype.set_c(2); + for (int i = 0; i < 16; ++i) { + message.AddExtension(unittest::repeated_foreign_message_extension)-> + CopyFrom(prototype); + } + min_expected_size += 16 * prototype.SpaceUsed(); + EXPECT_LE(min_expected_size, message.SpaceUsed()); + } +} + } // namespace } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index 7f05709e..085af8f9 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -46,6 +46,18 @@ namespace internal { namespace { const string kEmptyString; } +int StringSpaceUsedExcludingSelf(const string& str) { + const void* start = &str; + const void* end = &str + 1; + + if (start <= str.data() && str.data() <= end) { + // The string's data is stored inside the string object itself. + return 0; + } else { + return str.capacity(); + } +} + // =================================================================== // Helpers for reporting usage errors (e.g. trying to use GetInt32() on // a string field). @@ -147,13 +159,15 @@ GeneratedMessageReflection::GeneratedMessageReflection( int has_bits_offset, int unknown_fields_offset, int extensions_offset, - const DescriptorPool* descriptor_pool) + const DescriptorPool* descriptor_pool, + int object_size) : descriptor_ (descriptor), default_instance_ (default_instance), offsets_ (offsets), has_bits_offset_ (has_bits_offset), unknown_fields_offset_(unknown_fields_offset), extensions_offset_(extensions_offset), + object_size_ (object_size), descriptor_pool_ ((descriptor_pool == NULL) ? DescriptorPool::generated_pool() : descriptor_pool) { @@ -173,6 +187,71 @@ UnknownFieldSet* GeneratedMessageReflection::MutableUnknownFields( return reinterpret_cast<UnknownFieldSet*>(ptr); } +int GeneratedMessageReflection::SpaceUsed(const Message& message) const { + // object_size_ already includes the in-memory representation of each field + // in the message, so we only need to account for additional memory used by + // the fields. + int total_size = object_size_; + + total_size += GetUnknownFields(message).SpaceUsedExcludingSelf(); + + if (extensions_offset_ != -1) { + total_size += GetExtensionSet(message).SpaceUsedExcludingSelf(); + } + + for (int i = 0; i < descriptor_->field_count(); i++) { + const FieldDescriptor* field = descriptor_->field(i); + + if (field->is_repeated()) { + total_size += GetRaw<GenericRepeatedField>(message, field) + .GenericSpaceUsedExcludingSelf(); + } else { + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_INT32 : + case FieldDescriptor::CPPTYPE_INT64 : + case FieldDescriptor::CPPTYPE_UINT32: + case FieldDescriptor::CPPTYPE_UINT64: + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_FLOAT : + case FieldDescriptor::CPPTYPE_BOOL : + case FieldDescriptor::CPPTYPE_ENUM : + // Field is inline, so we've already counted it. + break; + + case FieldDescriptor::CPPTYPE_STRING: { + const string* ptr = GetField<const string*>(message, field); + + // Initially, the string points to the default value stored in + // the prototype. Only count the string if it has been changed + // from the default value. + const string* default_ptr = DefaultRaw<const string*>(field); + + if (ptr != default_ptr) { + // string fields are represented by just a pointer, so also + // include sizeof(string) as well. + total_size += sizeof(*ptr) + StringSpaceUsedExcludingSelf(*ptr); + } + break; + } + + case FieldDescriptor::CPPTYPE_MESSAGE: + if (&message == default_instance_) { + // For singular fields, the prototype just stores a pointer to the + // external type's prototype, so there is no extra memory usage. + } else { + const Message* sub_message = GetRaw<const Message*>(message, field); + if (sub_message != NULL) { + total_size += sub_message->SpaceUsed(); + } + } + break; + } + } + } + + return total_size; +} + // ------------------------------------------------------------------- bool GeneratedMessageReflection::HasField(const Message& message, @@ -765,6 +844,7 @@ inline Type* GeneratedMessageReflection::AddField( return reinterpret_cast<Type*>(repeated->GenericAdd()); } + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/generated_message_reflection.h b/src/google/protobuf/generated_message_reflection.h index 1fabb732..34383df8 100644 --- a/src/google/protobuf/generated_message_reflection.h +++ b/src/google/protobuf/generated_message_reflection.h @@ -116,13 +116,16 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection { // pool: DescriptorPool to search for extension definitions. Only // used by FindKnownExtensionByName() and // FindKnownExtensionByNumber(). + // object_size: The size of a message object of this type, as measured + // by sizeof(). GeneratedMessageReflection(const Descriptor* descriptor, const Message* default_instance, const int offsets[], int has_bits_offset, int unknown_fields_offset, int extensions_offset, - const DescriptorPool* pool); + const DescriptorPool* pool, + int object_size); ~GeneratedMessageReflection(); // implements Reflection ------------------------------------------- @@ -130,6 +133,8 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection { const UnknownFieldSet& GetUnknownFields(const Message& message) const; UnknownFieldSet* MutableUnknownFields(Message* message) const; + int SpaceUsed(const Message& message) const; + bool HasField(const Message& message, const FieldDescriptor* field) const; int FieldSize(const Message& message, const FieldDescriptor* field) const; void ClearField(Message* message, const FieldDescriptor* field) const; @@ -266,6 +271,7 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection { int has_bits_offset_; int unknown_fields_offset_; int extensions_offset_; + int object_size_; const DescriptorPool* descriptor_pool_; @@ -374,6 +380,11 @@ inline To dynamic_cast_if_available(From from) { #endif } +// Compute the space used by a string, not including sizeof(string) itself. +// This is slightly complicated because small strings store their data within +// the string object but large strings do not. +int StringSpaceUsedExcludingSelf(const string& str); + } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index 8c12fac5..4823912a 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -623,19 +623,17 @@ double Tokenizer::ParseFloat(const string& text) { return result; } -void Tokenizer::ParseString(const string& text, string* output) { - output->clear(); - +void Tokenizer::ParseStringAppend(const string& text, string* output) { // Reminder: text[0] is always the quote character. (If text is // empty, it's invalid, so we'll just return.) if (text.empty()) { GOOGLE_LOG(DFATAL) - << " ParseString::ParseString() passed text that could not have been" - " tokenized as a string: " << CEscape(text); + << " Tokenizer::ParseStringAppend() passed text that could not" + " have been tokenized as a string: " << CEscape(text); return; } - output->reserve(text.size()); + output->reserve(output->size() + text.size()); // Loop through the string copying characters to "output" and // interpreting escape sequences. Note that any invalid escape diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h index 048a588f..98386e0b 100644 --- a/src/google/protobuf/io/tokenizer.h +++ b/src/google/protobuf/io/tokenizer.h @@ -139,6 +139,9 @@ class LIBPROTOBUF_EXPORT Tokenizer { // result is undefined (possibly an assert failure). static void ParseString(const string& text, string* output); + // Identical to ParseString, but appends to output. + static void ParseStringAppend(const string& text, string* output); + // Parses a TYPE_INTEGER token. Returns false if the result would be // greater than max_value. Otherwise, returns true and sets *output to the // result. If the text is not from a Token of type TYPE_INTEGER originally @@ -283,6 +286,11 @@ inline const Tokenizer::Token& Tokenizer::current() { return current_; } +inline void Tokenizer::ParseString(const string& text, string* output) { + output->clear(); + ParseStringAppend(text, output); +} + } // namespace io } // namespace protobuf diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index c0f5aefd..dd7ed5c8 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -584,6 +584,15 @@ TEST_F(TokenizerTest, ParseString) { #endif // GTEST_HAS_DEATH_TEST } +TEST_F(TokenizerTest, ParseStringAppend) { + // Check that ParseString and ParseStringAppend differ. + string output("stuff+"); + Tokenizer::ParseStringAppend("'hello'", &output); + EXPECT_EQ("stuff+hello", output); + Tokenizer::ParseString("'hello'", &output); + EXPECT_EQ("hello", output); +} + // ------------------------------------------------------------------- // Each case parses some input text, ignoring the tokens produced, and diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc index ec92127d..2ee9e6c4 100644 --- a/src/google/protobuf/io/zero_copy_stream_unittest.cc +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -156,7 +156,9 @@ int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) { if (out_size <= in_size) { memcpy(out, in, out_size); - input->BackUp(in_size - out_size); + if (in_size > out_size) { + input->BackUp(in_size - out_size); + } return size; // Copied all of it. } diff --git a/src/google/protobuf/message.cc b/src/google/protobuf/message.cc index fddbdcb8..f6c932ff 100644 --- a/src/google/protobuf/message.cc +++ b/src/google/protobuf/message.cc @@ -204,6 +204,10 @@ void Message::SetCachedSize(int size) const { "Must implement one or the other."; } +int Message::SpaceUsed() const { + return GetReflection()->SpaceUsed(*this); +} + bool Message::SerializeToCodedStream(io::CodedOutputStream* output) const { GOOGLE_DCHECK(IsInitialized()) << InitializationErrorMessage("serialize", *this); return SerializePartialToCodedStream(output); @@ -291,6 +295,24 @@ bool Message::SerializePartialToOstream(ostream* output) const { } +string Message::SerializeAsString() const { + // If the compiler implements the (Named) Return Value Optimization, + // the local variable 'result' will not actually reside on the stack + // of this function, but will be overlaid with the object that the + // caller supplied for the return value to be constructed in. + string output; + if (!AppendToString(&output)) + output.clear(); + return output; +} + +string Message::SerializePartialAsString() const { + string output; + if (!AppendPartialToString(&output)) + output.clear(); + return output; +} + Reflection::~Reflection() {} // =================================================================== diff --git a/src/google/protobuf/message.h b/src/google/protobuf/message.h index cd25faa2..d96fcc60 100644 --- a/src/google/protobuf/message.h +++ b/src/google/protobuf/message.h @@ -95,7 +95,7 @@ // foo->ParseFromString(data); // // // Use the reflection interface to examine the contents. -// Reflection* reflection = foo->GetReflection(); +// const Reflection* reflection = foo->GetReflection(); // assert(reflection->GetString(foo, text_field) == "Hello World!"); // assert(reflection->CountField(foo, numbers_field) == 3); // assert(reflection->GetInt32(foo, numbers_field, 0) == 1); @@ -315,6 +315,16 @@ class LIBPROTOBUF_EXPORT Message { bool SerializePartialToOstream(ostream* output) const; + // Make a string encoding the message. Is equivalent to calling + // SerializeToString() on a string and using that. Returns the empty + // string if SerializeToString() would have returned an error. + // Note: If you intend to generate many such strings, you may + // reduce heap fragmentation by instead re-using the same string + // object with calls to SerializeToString(). + string SerializeAsString() const; + // Like SerializeAsString(), but allows missing required fields. + string SerializePartialAsString() const; + // Like SerializeToString(), but appends to the data to the string's existing // contents. All required fields must be set. bool AppendToString(string* output) const; @@ -326,6 +336,11 @@ class LIBPROTOBUF_EXPORT Message { // this, it MUST override SetCachedSize(). virtual int ByteSize() const; + // Computes (an estimate of) the total number of bytes currently used for + // storing the message in memory. The default implementation calls the + // Reflection object's SpaceUsed() method. + virtual int SpaceUsed() const; + // Serializes the message without recomputing the size. The message must // not have changed since the last call to ByteSize(); if it has, the results // are undefined. @@ -432,6 +447,9 @@ class LIBPROTOBUF_EXPORT Reflection { // recognized according to the Message's definition. virtual UnknownFieldSet* MutableUnknownFields(Message* message) const = 0; + // Estimate the amount of memory used by the message object. + virtual int SpaceUsed(const Message& message) const = 0; + // Check if the given non-repeated field is set. virtual bool HasField(const Message& message, const FieldDescriptor* field) const = 0; diff --git a/src/google/protobuf/message_unittest.cc b/src/google/protobuf/message_unittest.cc index 4d72ac26..da99741b 100644 --- a/src/google/protobuf/message_unittest.cc +++ b/src/google/protobuf/message_unittest.cc @@ -91,6 +91,8 @@ TEST(MessageTest, SerializeHelpers) { string temp = stream.str(); EXPECT_TRUE(temp == str1); + EXPECT_TRUE(message.SerializeAsString() == str1); + } TEST(MessageTest, ParseFromFileDescriptor) { diff --git a/src/google/protobuf/repeated_field.h b/src/google/protobuf/repeated_field.h index 203aa9bf..44cd875b 100644 --- a/src/google/protobuf/repeated_field.h +++ b/src/google/protobuf/repeated_field.h @@ -87,10 +87,14 @@ class LIBPROTOBUF_EXPORT GenericRepeatedField { virtual void* GenericAdd() = 0; virtual void GenericClear() = 0; virtual int GenericSize() const = 0; + virtual int GenericSpaceUsedExcludingSelf() const = 0; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GenericRepeatedField); }; +// We need this (from generated_message_reflection.cc). +int StringSpaceUsedExcludingSelf(const string& str); + } // namespace internal // RepeatedField is used to represent repeated fields of a primitive type (in @@ -140,6 +144,10 @@ class RepeatedField : public internal::GenericRepeatedField { iterator end(); const_iterator end() const; + // Returns the number of bytes used by the repeated field, excluding + // sizeof(*this) + int SpaceUsedExcludingSelf() const; + private: // See GenericRepeatedField for why this is private. // implements GenericRepeatedField --------------------------------- const void* GenericGet(int index) const; @@ -147,6 +155,7 @@ class RepeatedField : public internal::GenericRepeatedField { void* GenericAdd(); void GenericClear(); int GenericSize() const; + int GenericSpaceUsedExcludingSelf() const; private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedField); @@ -214,6 +223,10 @@ class RepeatedPtrField : public internal::GenericRepeatedField { iterator end(); const_iterator end() const; + // Returns (an estimate of) the number of bytes used by the repeated field, + // excluding sizeof(*this). + int SpaceUsedExcludingSelf() const; + // Advanced memory management -------------------------------------- // When hardcore memory management becomes necessary -- as it often // does here at Google -- the following methods may be useful. @@ -254,8 +267,13 @@ class RepeatedPtrField : public internal::GenericRepeatedField { void* GenericAdd(); void GenericClear(); int GenericSize() const; + int GenericSpaceUsedExcludingSelf() const; private: + // Returns (an estimate of) the number of bytes used by an individual + // element. + int ElementSpaceUsed(Element* element) const; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedPtrField); static const int kInitialSize = 4; @@ -398,6 +416,10 @@ RepeatedField<Element>::end() const { return elements_ + current_size_; } +template <typename Element> +inline int RepeatedField<Element>::SpaceUsedExcludingSelf() const { + return (elements_ != initial_space_) ? total_size_ * sizeof(elements_[0]) : 0; +} template <typename Element> const void* RepeatedField<Element>::GenericGet(int index) const { @@ -427,6 +449,11 @@ int RepeatedField<Element>::GenericSize() const { } template <typename Element> +int RepeatedField<Element>::GenericSpaceUsedExcludingSelf() const { + return SpaceUsedExcludingSelf(); +} + +template <typename Element> inline void RepeatedField<Element>::Reserve(int new_size) { if (total_size_ >= new_size) return; @@ -595,6 +622,26 @@ void RepeatedPtrField<Element>::Swap(RepeatedPtrField* other) { } } +template <typename Element> +inline int RepeatedPtrField<Element>::SpaceUsedExcludingSelf() const { + int allocated_bytes = + (elements_ != initial_space_) ? total_size_ * sizeof(elements_[0]) : 0; + for (int i = 0; i < allocated_size_; ++i) { + allocated_bytes += ElementSpaceUsed(elements_[i]); + } + return allocated_bytes; +} + +template <typename Element> +inline int RepeatedPtrField<Element>::ElementSpaceUsed(Element* e) const { + return e->SpaceUsed(); +} + +template <> +inline int RepeatedPtrField<string>::ElementSpaceUsed(string* s) const { + return sizeof(*s) + internal::StringSpaceUsedExcludingSelf(*s); +} + template <typename Element> inline void RepeatedPtrField<Element>::AddAllocated(Element* value) { @@ -665,6 +712,11 @@ int RepeatedPtrField<Element>::GenericSize() const { return size(); } +template <typename Element> +int RepeatedPtrField<Element>::GenericSpaceUsedExcludingSelf() const { + return SpaceUsedExcludingSelf(); +} + template <typename Element> inline void RepeatedPtrField<Element>::Reserve(int new_size) { diff --git a/src/google/protobuf/repeated_field_unittest.cc b/src/google/protobuf/repeated_field_unittest.cc index 3483fdbb..3ab97620 100644 --- a/src/google/protobuf/repeated_field_unittest.cc +++ b/src/google/protobuf/repeated_field_unittest.cc @@ -69,6 +69,7 @@ TEST(RepeatedField, Small) { EXPECT_EQ(field.size(), 2); EXPECT_EQ(field.Get(0), 5); EXPECT_EQ(field.Get(1), 23); + EXPECT_EQ(field.SpaceUsedExcludingSelf(), 0); field.RemoveLast(); @@ -78,6 +79,7 @@ TEST(RepeatedField, Small) { field.Clear(); EXPECT_EQ(field.size(), 0); + EXPECT_EQ(field.SpaceUsedExcludingSelf(), 0); } // Test operations on a RepeatedField which is large enough to allocate a @@ -94,6 +96,9 @@ TEST(RepeatedField, Large) { for (int i = 0; i < 16; i++) { EXPECT_EQ(field.Get(i), i * i); } + + int expected_usage = 16 * sizeof(int); + EXPECT_GE(field.SpaceUsedExcludingSelf(), expected_usage); } // Test swapping between various types of RepeatedFields. @@ -278,6 +283,9 @@ TEST(RepeatedPtrField, Large) { EXPECT_EQ(field.Get(i).size(), 1); EXPECT_EQ(field.Get(i)[0], 'a' + i); } + + int min_expected_usage = 16 * sizeof(string); + EXPECT_GE(field.SpaceUsedExcludingSelf(), min_expected_usage); } TEST(RepeatedPtrField, SwapSmallSmall) { diff --git a/src/google/protobuf/stubs/common.h b/src/google/protobuf/stubs/common.h index 858d97fa..fbd2231e 100644 --- a/src/google/protobuf/stubs/common.h +++ b/src/google/protobuf/stubs/common.h @@ -1071,6 +1071,12 @@ template<typename T> struct remove_pointer<T* volatile> { typedef T type; }; template<typename T> struct remove_pointer<T* const volatile> { typedef T type; }; +// =================================================================== + +// Checks if the buffer contains structurally-valid UTF-8. Implemented in +// structurally_valid.cc. +bool IsStructurallyValidUTF8(const char* buf, int len); + } // namespace internal } // namespace protobuf diff --git a/src/google/protobuf/stubs/structurally_valid.cc b/src/google/protobuf/stubs/structurally_valid.cc new file mode 100644 index 00000000..e385a81e --- /dev/null +++ b/src/google/protobuf/stubs/structurally_valid.cc @@ -0,0 +1,521 @@ +// Copyright 2005-2008 Google Inc. All Rights Reserved. +// Author: jrm@google.com (Jim Meehan) + +#include <google/protobuf/stubs/common.h> + +namespace google { +namespace protobuf { +namespace internal { + +// These four-byte entries compactly encode how many bytes 0..255 to delete +// in making a string replacement, how many bytes to add 0..255, and the offset +// 0..64k-1 of the replacement string in remap_string. +struct RemapEntry { + uint8 delete_bytes; + uint8 add_bytes; + uint16 bytes_offset; +}; + +// Exit type codes for state tables. All but the first get stuffed into +// signed one-byte entries. The first is only generated by executable code. +// To distinguish from next-state entries, these must be contiguous and +// all <= kExitNone +typedef enum { + kExitDstSpaceFull = 239, + kExitIllegalStructure, // 240 + kExitOK, // 241 + kExitReject, // ... + kExitReplace1, + kExitReplace2, + kExitReplace3, + kExitReplace21, + kExitReplace31, + kExitReplace32, + kExitReplaceOffset1, + kExitReplaceOffset2, + kExitReplace1S0, + kExitSpecial, + kExitDoAgain, + kExitRejectAlt, + kExitNone // 255 +} ExitReason; + + +// This struct represents one entire state table. The three initialized byte +// areas are state_table, remap_base, and remap_string. state0 and state0_size +// give the byte offset and length within state_table of the initial state -- +// table lookups are expected to start and end in this state, but for +// truncated UTF-8 strings, may end in a different state. These allow a quick +// test for that condition. entry_shift is 8 for tables subscripted by a full +// byte value and 6 for space-optimized tables subscripted by only six +// significant bits in UTF-8 continuation bytes. +typedef struct { + const uint32 state0; + const uint32 state0_size; + const uint32 total_size; + const int max_expand; + const int entry_shift; + const int bytes_per_entry; + const uint32 losub; + const uint32 hiadd; + const uint8* state_table; + const RemapEntry* remap_base; + const uint8* remap_string; + const uint8* fast_state; +} UTF8StateMachineObj; + +typedef UTF8StateMachineObj UTF8ScanObj; + +#define X__ (kExitIllegalStructure) +#define RJ_ (kExitReject) +#define S1_ (kExitReplace1) +#define S2_ (kExitReplace2) +#define S3_ (kExitReplace3) +#define S21 (kExitReplace21) +#define S31 (kExitReplace31) +#define S32 (kExitReplace32) +#define T1_ (kExitReplaceOffset1) +#define T2_ (kExitReplaceOffset2) +#define S11 (kExitReplace1S0) +#define SP_ (kExitSpecial) +#define D__ (kExitDoAgain) +#define RJA (kExitRejectAlt) + +// Entire table has 9 state blocks of 256 entries each +static const unsigned int utf8acceptnonsurrogates_STATE0 = 0; // state[0] +static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256; // =[1] +static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304; +static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0; +static const unsigned int utf8acceptnonsurrogates_SHIFT = 8; +static const unsigned int utf8acceptnonsurrogates_BYTES = 1; +static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020; +static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000; + +static const uint8 utf8acceptnonsurrogates[] = { +// state[0] 0x000000 Byte 1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, + 4, 5, 5, 5, 6, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[1] 0x000080 Byte 2 of 2 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[2] 0x000000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[3] 0x001000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[4] 0x000000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[5] 0x040000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[6] 0x100000 Byte 2 of 4 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[7] 0x00d000 Byte 2 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +// state[8] 0x00d800 Byte 3 of 3 +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, + +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, +RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, + +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, +}; + +// Remap base[0] = (del, add, string_offset) +static const RemapEntry utf8acceptnonsurrogates_remap_base[] = { +{0, 0, 0} }; + +// Remap string[0] +static const unsigned char utf8acceptnonsurrogates_remap_string[] = { +0 }; + +static const unsigned char utf8acceptnonsurrogates_fast[256] = { +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; + +static const UTF8ScanObj utf8acceptnonsurrogates_obj = { + utf8acceptnonsurrogates_STATE0, + utf8acceptnonsurrogates_STATE0_SIZE, + utf8acceptnonsurrogates_TOTAL_SIZE, + utf8acceptnonsurrogates_MAX_EXPAND_X4, + utf8acceptnonsurrogates_SHIFT, + utf8acceptnonsurrogates_BYTES, + utf8acceptnonsurrogates_LOSUB, + utf8acceptnonsurrogates_HIADD, + utf8acceptnonsurrogates, + utf8acceptnonsurrogates_remap_base, + utf8acceptnonsurrogates_remap_string, + utf8acceptnonsurrogates_fast +}; + + +#undef X__ +#undef RJ_ +#undef S1_ +#undef S2_ +#undef S3_ +#undef S21 +#undef S31 +#undef S32 +#undef T1_ +#undef T2_ +#undef S11 +#undef SP_ +#undef D__ +#undef RJA + +// Return true if current Tbl pointer is within state0 range +// Note that unsigned compare checks both ends of range simultaneously +static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) { + const uint8* Tbl0 = &st->state_table[st->state0]; + return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size); +} + +// Scan a UTF-8 string based on state table. +// Always scan complete UTF-8 characters +// Set number of bytes scanned. Return reason for exiting +int UTF8GenericScan(const UTF8ScanObj* st, + const char * str, + int str_length, + int* bytes_consumed) { + *bytes_consumed = 0; + if (str_length == 0) return kExitOK; + + int eshift = st->entry_shift; + const uint8* isrc = reinterpret_cast<const uint8*>(str); + const uint8* src = isrc; + const uint8* srclimit = isrc + str_length; + const uint8* srclimit8 = srclimit - 7; + const uint8* Tbl_0 = &st->state_table[st->state0]; + + DoAgain: + // Do state-table scan + int e = 0; + uint8 c; + + // Do fast for groups of 8 identity bytes. + // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop, + // including slowing slightly on cr/lf/ht + //---------------------------- + const uint8* Tbl2 = &st->fast_state[0]; + uint32 losub = st->losub; + uint32 hiadd = st->hiadd; + while (src < srclimit8) { + uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0]; + uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1]; + src += 8; + // This is a fast range check for all bytes in [lowsub..0x80-hiadd) + uint32 temp = (s0123 - losub) | (s0123 + hiadd) | + (s4567 - losub) | (s4567 + hiadd); + if ((temp & 0x80808080) != 0) { + // We typically end up here on cr/lf/ht; src was incremented + int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) | + (Tbl2[src[-6]] | Tbl2[src[-5]]); + if (e0123 != 0) { + src -= 8; + break; + } // Exit on Non-interchange + e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) | + (Tbl2[src[-2]] | Tbl2[src[-1]]); + if (e0123 != 0) { + src -= 4; + break; + } // Exit on Non-interchange + // Else OK, go around again + } + } + //---------------------------- + + // Byte-at-a-time scan + //---------------------------- + const uint8* Tbl = Tbl_0; + while (src < srclimit) { + c = *src; + e = Tbl[c]; + src++; + if (e >= kExitIllegalStructure) {break;} + Tbl = &Tbl_0[e << eshift]; + } + //---------------------------- + + + // Exit posibilities: + // Some exit code, !state0, back up over last char + // Some exit code, state0, back up one byte exactly + // source consumed, !state0, back up over partial char + // source consumed, state0, exit OK + // For illegal byte in state0, avoid backup up over PREVIOUS char + // For truncated last char, back up to beginning of it + + if (e >= kExitIllegalStructure) { + // Back up over exactly one byte of rejected/illegal UTF-8 character + src--; + // Back up more if needed + if (!InStateZero(st, Tbl)) { + do { + src--; + } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); + } + } else if (!InStateZero(st, Tbl)) { + // Back up over truncated UTF-8 character + e = kExitIllegalStructure; + do { + src--; + } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); + } else { + // Normal termination, source fully consumed + e = kExitOK; + } + + if (e == kExitDoAgain) { + // Loop back up to the fast scan + goto DoAgain; + } + + *bytes_consumed = src - isrc; + return e; +} + +int UTF8GenericScanFastAscii(const UTF8ScanObj* st, + const char * str, + int str_length, + int* bytes_consumed) { + *bytes_consumed = 0; + if (str_length == 0) return kExitOK; + + const uint8* isrc = reinterpret_cast<const uint8*>(str); + const uint8* src = isrc; + const uint8* srclimit = isrc + str_length; + const uint8* srclimit8 = srclimit - 7; + int n; + int rest_consumed; + int exit_reason; + do { + while ((src < srclimit8) && + (((reinterpret_cast<const uint32*>(src)[0] | + reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) { + src += 8; + } + while ((src < srclimit) && (src[0] < 0x80)) { + src++; + } + // Run state table on the rest + n = src - isrc; + exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed); + src += rest_consumed; + } while ( exit_reason == kExitDoAgain ); + + *bytes_consumed = src - isrc; + return exit_reason; +} + +// Hack: On some compilers the static tables are initialized at startup. +// We can't use them until they are initialized. However, some Protocol +// Buffer parsing happens at static init time and may try to validate +// UTF-8 strings. Since UTF-8 validation is only used for debugging +// anyway, we simply always return success if initialization hasn't +// occurred yet. +namespace { + +bool module_initialized_ = false; + +struct InitDetector { + InitDetector() { + module_initialized_ = true; + } +}; +InitDetector init_detector; + +} // namespace + +bool IsStructurallyValidUTF8(const char* buf, int len) { + if (!module_initialized_) return true; + + int bytes_consumed = 0; + UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, + buf, len, &bytes_consumed); + return (bytes_consumed == len); +} + +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/stubs/structurally_valid_unittest.cc b/src/google/protobuf/stubs/structurally_valid_unittest.cc new file mode 100644 index 00000000..22825516 --- /dev/null +++ b/src/google/protobuf/stubs/structurally_valid_unittest.cc @@ -0,0 +1,30 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// Author: xpeng@google.com (Peter Peng) + +#include <google/protobuf/stubs/common.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace internal { +namespace { + +TEST(StructurallyValidTest, ValidUTF8String) { + // On GCC, this string can be written as: + // "abcd 1234 - \u2014\u2013\u2212" + // MSVC seems to interpret \u differently. + string valid_str("abcd 1234 - \342\200\224\342\200\223\342\210\222"); + EXPECT_TRUE(IsStructurallyValidUTF8(valid_str.data(), + valid_str.size())); +} + +TEST(StructurallyValidTest, InvalidUTF8String) { + string invalid_str("\xA0\xB0"); + EXPECT_FALSE(IsStructurallyValidUTF8(invalid_str.data(), + invalid_str.size())); +} + +} // namespace +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/text_format.cc b/src/google/protobuf/text_format.cc index 16a9483e..b7101661 100644 --- a/src/google/protobuf/text_format.cc +++ b/src/google/protobuf/text_format.cc @@ -34,6 +34,7 @@ #include <float.h> #include <math.h> +#include <stdio.h> #include <stack> #include <limits> @@ -65,13 +66,23 @@ string Message::ShortDebugString() const { // DebugString() and munging the result. string result = DebugString(); - // Replace each contiguous range of whitespace (including newlines) with a - // single space. - for (int i = 0; i < result.size(); i++) { - int pos = i; - while (isspace(result[pos])) ++pos; - if (pos > i) result.replace(i, pos - i, " "); + // Replace each contiguous range of whitespace (including newlines, and + // starting with a newline) with a single space. + int out = 0; + for (int i = 0; i < result.size(); ++i) { + if (result[i] != '\n') { + result[out++] = result[i]; + } else { + while (i < result.size() && isspace(result[i])) ++i; + --i; + result[out++] = ' '; + } } + // Remove trailing space, if there is one. + if (out > 0 && isspace(result[out - 1])) { + --out; + } + result.resize(out); return result; } @@ -103,14 +114,16 @@ class TextFormat::Parser::ParserImpl { FORBID_SINGULAR_OVERWRITES = 1, // an error is issued }; - ParserImpl(io::ZeroCopyInputStream* input_stream, + ParserImpl(const Descriptor* root_message_type, + io::ZeroCopyInputStream* input_stream, io::ErrorCollector* error_collector, SingularOverwritePolicy singular_overwrite_policy) : error_collector_(error_collector), tokenizer_error_collector_(this), tokenizer_(input_stream, &tokenizer_error_collector_), - root_message_type_(NULL), - singular_overwrite_policy_(singular_overwrite_policy) { + root_message_type_(root_message_type), + singular_overwrite_policy_(singular_overwrite_policy), + had_errors_(false) { // For backwards-compatibility with proto1, we need to allow the 'f' suffix // for floats. tokenizer_.set_allow_f_after_float(true); @@ -128,12 +141,10 @@ class TextFormat::Parser::ParserImpl { // false if an error occurs (an error will also be logged to // GOOGLE_LOG(ERROR)). bool Parse(Message* output) { - root_message_type_ = output->GetDescriptor(); - // Consume fields until we cannot do so anymore. while(true) { if (LookingAtType(io::Tokenizer::TYPE_END)) { - return true; + return !had_errors_; } DO(ConsumeField(output)); @@ -141,6 +152,7 @@ class TextFormat::Parser::ParserImpl { } void ReportError(int line, int col, const string& message) { + had_errors_ = true; if (error_collector_ == NULL) { if (line >= 0) { GOOGLE_LOG(ERROR) << "Error parsing text-format " @@ -571,6 +583,7 @@ class TextFormat::Parser::ParserImpl { io::Tokenizer tokenizer_; const Descriptor* root_message_type_; SingularOverwritePolicy singular_overwrite_policy_; + bool had_errors_; }; #undef DO @@ -699,7 +712,7 @@ TextFormat::Parser::~Parser() {} bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input, Message* output) { output->Clear(); - ParserImpl parser(input, error_collector_, + ParserImpl parser(output->GetDescriptor(), input, error_collector_, ParserImpl::FORBID_SINGULAR_OVERWRITES); return MergeUsingImpl(input, output, &parser); } @@ -712,7 +725,7 @@ bool TextFormat::Parser::ParseFromString(const string& input, bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input, Message* output) { - ParserImpl parser(input, error_collector_, + ParserImpl parser(output->GetDescriptor(), input, error_collector_, ParserImpl::ALLOW_SINGULAR_OVERWRITES); return MergeUsingImpl(input, output, &parser); } diff --git a/src/google/protobuf/text_format_unittest.cc b/src/google/protobuf/text_format_unittest.cc index 7610c384..3d2cda9d 100644 --- a/src/google/protobuf/text_format_unittest.cc +++ b/src/google/protobuf/text_format_unittest.cc @@ -64,12 +64,12 @@ inline bool IsNaN(double value) { // A basic string with different escapable characters for testing. const string kEscapeTestString = "\"A string with ' characters \n and \r newlines and \t tabs and \001 " - "slashes \\"; + "slashes \\ and multiple spaces"; // A representation of the above string with all the characters escaped. const string kEscapeTestStringEscaped = "\"\\\"A string with \\' characters \\n and \\r newlines " - "and \\t tabs and \\001 slashes \\\\\""; + "and \\t tabs and \\001 slashes \\\\ and multiple spaces\""; class TextFormatTest : public testing::Test { public: @@ -126,6 +126,18 @@ TEST_F(TextFormatExtensionsTest, Extensions) { EXPECT_EQ(proto_debug_string_, proto_.DebugString()); } +TEST_F(TextFormatTest, ShortDebugString) { + proto_.set_optional_int32(1); + proto_.set_optional_string("hello"); + proto_.mutable_optional_nested_message()->set_bb(2); + proto_.mutable_optional_foreign_message(); + + EXPECT_EQ("optional_int32: 1 optional_string: \"hello\" " + "optional_nested_message { bb: 2 } " + "optional_foreign_message { }", + proto_.ShortDebugString()); +} + TEST_F(TextFormatTest, StringEscape) { // Set the string value to test. proto_.set_optional_string(kEscapeTestString); @@ -140,6 +152,10 @@ TEST_F(TextFormatTest, StringEscape) { // Compare. EXPECT_EQ(correct_string, debug_string); + + string expected_short_debug_string = "optional_string: " + + kEscapeTestStringEscaped; + EXPECT_EQ(expected_short_debug_string, proto_.ShortDebugString()); } TEST_F(TextFormatTest, PrintUnknownFields) { @@ -736,6 +752,22 @@ TEST_F(TextFormatParserTest, PrintErrorsToStderr) { errors[0]); } +TEST_F(TextFormatParserTest, FailsOnTokenizationError) { + vector<string> errors; + + { + ScopedMemoryLog log; + unittest::TestAllTypes proto; + EXPECT_FALSE(TextFormat::ParseFromString("\020", &proto)); + errors = log.GetMessages(ERROR); + } + + ASSERT_EQ(1, errors.size()); + EXPECT_EQ("Error parsing text-format protobuf_unittest.TestAllTypes: " + "1:1: Invalid control characters encountered in text.", + errors[0]); +} + class TextFormatMessageSetTest : public testing::Test { protected: diff --git a/src/google/protobuf/unittest.proto b/src/google/protobuf/unittest.proto index 56346b95..0aa66b9f 100644 --- a/src/google/protobuf/unittest.proto +++ b/src/google/protobuf/unittest.proto @@ -452,6 +452,15 @@ message TestExtremeDefaultValues { optional string utf8_string = 6 [default = "\341\210\264"]; } +// Test String and Bytes: string is for valid UTF-8 strings +message OneString { + optional string data = 1; +} + +message OneBytes { + optional bytes data = 1; +} + // Test that RPC services work. message FooRequest {} message FooResponse {} diff --git a/src/google/protobuf/unittest_optimize_for.proto b/src/google/protobuf/unittest_optimize_for.proto index ac3f2f21..feecbef8 100644 --- a/src/google/protobuf/unittest_optimize_for.proto +++ b/src/google/protobuf/unittest_optimize_for.proto @@ -48,6 +48,7 @@ message TestOptimizedForSize { extend TestOptimizedForSize { optional int32 test_extension = 1234; + optional TestRequiredOptimizedForSize test_extension2 = 1235; } } diff --git a/src/google/protobuf/unknown_field_set.cc b/src/google/protobuf/unknown_field_set.cc index b170d41d..6a9be5a4 100644 --- a/src/google/protobuf/unknown_field_set.cc +++ b/src/google/protobuf/unknown_field_set.cc @@ -130,6 +130,30 @@ UnknownField* UnknownFieldSet::AddField(int number) { return field; } +int UnknownFieldSet::SpaceUsedExcludingSelf() const { + int total_size = 0; + if (internal_ != NULL) { + total_size += sizeof(*internal_); + total_size += internal_->active_fields_.capacity() * + sizeof(Internal::FieldVector::value_type); + total_size += internal_->fields_.size() * + sizeof(Internal::FieldMap::value_type); + + // Account for the UnknownField objects themselves. + for (Internal::FieldMap::const_iterator it = internal_->fields_.begin(), + end = internal_->fields_.end(); + it != end; + ++it) { + total_size += it->second->SpaceUsed(); + } + } + return total_size; +} + +int UnknownFieldSet::SpaceUsed() const { + return sizeof(*this) + SpaceUsedExcludingSelf(); +} + UnknownField::UnknownField(int number) : number_(number), index_(-1) { @@ -154,5 +178,15 @@ void UnknownField::MergeFrom(const UnknownField& other) { group_ .MergeFrom(other.group_ ); } +int UnknownField::SpaceUsed() const { + int total_size = sizeof(*this); + total_size += varint_.SpaceUsedExcludingSelf(); + total_size += fixed32_.SpaceUsedExcludingSelf(); + total_size += fixed64_.SpaceUsedExcludingSelf(); + total_size += length_delimited_.SpaceUsedExcludingSelf(); + total_size += group_.SpaceUsedExcludingSelf(); + return total_size; +} + } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/unknown_field_set.h b/src/google/protobuf/unknown_field_set.h index e65c6a4c..55eec6e2 100644 --- a/src/google/protobuf/unknown_field_set.h +++ b/src/google/protobuf/unknown_field_set.h @@ -75,6 +75,9 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet { // Merge the contents of some other UnknownFieldSet with this one. void MergeFrom(const UnknownFieldSet& other); + // Swaps the contents of some other UnknownFieldSet with this one. + inline void Swap(UnknownFieldSet* x); + // Returns the number of fields present in the UnknownFieldSet. inline int field_count() const; // Get a field in the set, where 0 <= index < field_count(). The fields @@ -102,6 +105,13 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet { return ParseFromArray(data.data(), data.size()); } + // Computes (an estimate of) the total number of bytes currently used for + // storing the unknown fields in memory. Does NOT include + // sizeof(*this) in the calculation. + int SpaceUsedExcludingSelf() const; + // Version of SpaceUsed() including sizeof(*this). + int SpaceUsed() const; + private: // "Active" fields are ones which have been added since the last time Clear() // was called. Inactive fields are objects we are keeping around incase @@ -114,10 +124,12 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet { // the same field number they were used for originally because this makes // it more likely that the previously-allocated memory will have the right // layout. - map<int, UnknownField*> fields_; + typedef map<int, UnknownField*> FieldMap; + FieldMap fields_; // Contains the fields from fields_ that are currently active. - vector<UnknownField*> active_fields_; + typedef vector<UnknownField*> FieldVector; + FieldVector active_fields_; }; // We want an UnknownFieldSet to use no more space than a single pointer @@ -203,6 +215,10 @@ class LIBPROTOBUF_EXPORT UnknownField { inline RepeatedPtrField<string >* mutable_length_delimited(); inline RepeatedPtrField<UnknownFieldSet>* mutable_group (); + // Returns (an estimate of) the total number of bytes used to represent the + // unknown field. + int SpaceUsed() const; + private: friend class UnknownFieldSet; UnknownField(int number); @@ -226,6 +242,10 @@ inline bool UnknownFieldSet::empty() const { return internal_ == NULL || internal_->active_fields_.empty(); } +inline void UnknownFieldSet::Swap(UnknownFieldSet* x) { + std::swap(internal_, x->internal_); +} + inline int UnknownFieldSet::field_count() const { return (internal_ == NULL) ? 0 : internal_->active_fields_.size(); } diff --git a/src/google/protobuf/unknown_field_set_unittest.cc b/src/google/protobuf/unknown_field_set_unittest.cc index c358e1f1..c7e78b23 100644 --- a/src/google/protobuf/unknown_field_set_unittest.cc +++ b/src/google/protobuf/unknown_field_set_unittest.cc @@ -222,6 +222,30 @@ TEST_F(UnknownFieldSetTest, CopyFrom) { EXPECT_EQ(empty_message_.DebugString(), message.DebugString()); } +TEST_F(UnknownFieldSetTest, Swap) { + unittest::TestEmptyMessage other_message; + ASSERT_TRUE(other_message.ParseFromString(GetBizarroData())); + + EXPECT_GT(empty_message_.unknown_fields().field_count(), 0); + EXPECT_GT(other_message.unknown_fields().field_count(), 0); + const string debug_string = empty_message_.DebugString(); + const string other_debug_string = other_message.DebugString(); + EXPECT_NE(debug_string, other_debug_string); + + empty_message_.Swap(&other_message); + EXPECT_EQ(debug_string, other_message.DebugString()); + EXPECT_EQ(other_debug_string, empty_message_.DebugString()); +} + +TEST_F(UnknownFieldSetTest, SwapWithSelf) { + const string debug_string = empty_message_.DebugString(); + EXPECT_GT(empty_message_.unknown_fields().field_count(), 0); + + empty_message_.Swap(&empty_message_); + EXPECT_GT(empty_message_.unknown_fields().field_count(), 0); + EXPECT_EQ(debug_string, empty_message_.DebugString()); +} + TEST_F(UnknownFieldSetTest, MergeFrom) { unittest::TestEmptyMessage source, destination; @@ -426,6 +450,72 @@ TEST_F(UnknownFieldSetTest, UnknownEnumValue) { } } +TEST_F(UnknownFieldSetTest, SpaceUsedExcludingSelf) { + { + // Make sure an unknown field set has zero space used until a field is + // actually added. + unittest::TestEmptyMessage empty_message; + const int empty_message_size = empty_message.SpaceUsed(); + UnknownFieldSet* unknown_fields = empty_message.mutable_unknown_fields(); + EXPECT_EQ(empty_message_size, empty_message.SpaceUsed()); + unknown_fields->AddField(1)->add_varint(0); + EXPECT_LT(empty_message_size, empty_message.SpaceUsed()); + } + { + // Test varints. + UnknownFieldSet unknown_fields; + UnknownField* field = unknown_fields.AddField(1); + const int base_size = unknown_fields.SpaceUsedExcludingSelf(); + for (int i = 0; i < 16; ++i) { + field->add_varint(i); + } + // Should just defer computation to the RepeatedField. + int expected_size = base_size + field->varint().SpaceUsedExcludingSelf(); + EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf()); + } + { + // Test fixed32s. + UnknownFieldSet unknown_fields; + UnknownField* field = unknown_fields.AddField(1); + const int base_size = unknown_fields.SpaceUsedExcludingSelf(); + for (int i = 0; i < 16; ++i) { + field->add_fixed32(i); + } + int expected_size = base_size + field->fixed32().SpaceUsedExcludingSelf(); + EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf()); + } + { + // Test fixed64s. + UnknownFieldSet unknown_fields; + UnknownField* field = unknown_fields.AddField(1); + const int base_size = unknown_fields.SpaceUsedExcludingSelf(); + for (int i = 0; i < 16; ++i) { + field->add_fixed64(i); + } + int expected_size = base_size + field->fixed64().SpaceUsedExcludingSelf(); + EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf()); + } + { + // Test length-delimited types. + UnknownFieldSet unknown_fields; + UnknownField* field = unknown_fields.AddField(1); + const int base_size = unknown_fields.SpaceUsedExcludingSelf(); + for (int i = 0; i < 16; ++i) { + field->add_length_delimited()->assign("my length delimited string"); + } + int expected_size = base_size + + field->length_delimited().SpaceUsedExcludingSelf(); + EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf()); + } +} + +TEST_F(UnknownFieldSetTest, SpaceUsed) { + UnknownFieldSet unknown_fields; + const int expected_size = sizeof(unknown_fields) + + unknown_fields.SpaceUsedExcludingSelf(); + EXPECT_EQ(expected_size, unknown_fields.SpaceUsed()); +} + } // namespace } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc index 219d1f27..99ea619b 100644 --- a/src/google/protobuf/wire_format.cc +++ b/src/google/protobuf/wire_format.cc @@ -648,8 +648,7 @@ bool WireFormat::SerializeFieldWithCachedSizes( // Handle strings separately so that we can get string references // instead of copying. - case FieldDescriptor::TYPE_STRING: - case FieldDescriptor::TYPE_BYTES: { + case FieldDescriptor::TYPE_STRING: { string scratch; const string& value = field->is_repeated() ? message_reflection->GetRepeatedStringReference( @@ -658,6 +657,16 @@ bool WireFormat::SerializeFieldWithCachedSizes( if (!WriteString(field->number(), value, output)) return false; break; } + + case FieldDescriptor::TYPE_BYTES: { + string scratch; + const string& value = field->is_repeated() ? + message_reflection->GetRepeatedStringReference( + message, field, j, &scratch) : + message_reflection->GetStringReference(message, field, &scratch); + if (!WriteBytes(field->number(), value, output)) return false; + break; + } } } diff --git a/src/google/protobuf/wire_format_inl.h b/src/google/protobuf/wire_format_inl.h index 6545ee80..539d8c67 100644 --- a/src/google/protobuf/wire_format_inl.h +++ b/src/google/protobuf/wire_format_inl.h @@ -36,10 +36,17 @@ #define GOOGLE_PROTOBUF_WIRE_FORMAT_INL_H__ #include <string> +#include <google/protobuf/stubs/common.h> #include <google/protobuf/wire_format.h> #include <google/protobuf/io/coded_stream.h> +// Do UTF-8 validation on string type in Debug build only +#ifndef NDEBUG +#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +#endif + + namespace google { namespace protobuf { namespace internal { @@ -122,12 +129,18 @@ inline bool WireFormat::ReadEnum(io::CodedInputStream* input, int* value) { } inline bool WireFormat::ReadString(io::CodedInputStream* input, string* value) { - // WARNING: In wire_format.cc, both strings and bytes are handled by - // ReadString() to avoid code duplication. If the implementations become - // different, you will need to update that usage. + // String is for UTF-8 text only uint32 length; if (!input->ReadVarint32(&length)) return false; - return input->ReadString(value, length); + if (!input->ReadString(value, length)) return false; +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + if (!IsStructurallyValidUTF8(value->data(), length)) { + GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while " + "parsing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes."; + } +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + return true; } inline bool WireFormat::ReadBytes(io::CodedInputStream* input, string* value) { uint32 length; @@ -270,9 +283,14 @@ inline bool WireFormat::WriteEnum(int field_number, int value, inline bool WireFormat::WriteString(int field_number, const string& value, io::CodedOutputStream* output) { - // WARNING: In wire_format.cc, both strings and bytes are handled by - // WriteString() to avoid code duplication. If the implementations become - // different, you will need to update that usage. + // String is for UTF-8 text only +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + if (!IsStructurallyValidUTF8(value.data(), value.size())) { + GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while " + "serializing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes."; + } +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED return WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output) && output->WriteVarint32(value.size()) && output->WriteString(value); diff --git a/src/google/protobuf/wire_format_unittest.cc b/src/google/protobuf/wire_format_unittest.cc index 6e3d4745..43dccd1a 100644 --- a/src/google/protobuf/wire_format_unittest.cc +++ b/src/google/protobuf/wire_format_unittest.cc @@ -199,6 +199,30 @@ TEST(WireFormatTest, SerializeFieldsAndExtensions) { TestUtil::ExpectAllFieldsAndExtensionsInOrder(generated_data); } +TEST(WireFormatTest, ParseMultipleExtensionRanges) { + // Make sure we can parse a message that contains multiple extensions ranges. + unittest::TestFieldOrderings source; + string data; + + TestUtil::SetAllFieldsAndExtensions(&source); + source.SerializeToString(&data); + + { + unittest::TestFieldOrderings dest; + EXPECT_TRUE(dest.ParseFromString(data)); + EXPECT_EQ(source.DebugString(), dest.DebugString()); + } + + // Also test using reflection-based parsing. + { + unittest::TestFieldOrderings dest; + io::ArrayInputStream raw_input(data.data(), data.size()); + io::CodedInputStream coded_input(&raw_input); + EXPECT_TRUE(WireFormat::ParseAndMergePartial(&coded_input, &dest)); + EXPECT_EQ(source.DebugString(), dest.DebugString()); + } +} + const int kUnknownTypeId = 1550055; TEST(WireFormatTest, SerializeMessageSet) { @@ -421,7 +445,7 @@ class WireFormatInvalidInputTest : public testing::Test { io::StringOutputStream raw_output(&result); io::CodedOutputStream output(&raw_output); - EXPECT_TRUE(WireFormat::WriteString( + EXPECT_TRUE(WireFormat::WriteBytes( field->number(), string(bytes, size), &output)); } @@ -541,6 +565,130 @@ TEST_F(WireFormatInvalidInputTest, InvalidStringInUnknownGroup) { EXPECT_FALSE(WireFormat::SkipMessage(&coded_input, &unknown_fields)); } +// Test differences between string and bytes. +// Value of a string type must be valid UTF-8 string. When UTF-8 +// validation is enabled (GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED): +// WriteInvalidUTF8String: see error message. +// ReadInvalidUTF8String: see error message. +// WriteValidUTF8String: fine. +// ReadValidUTF8String: fine. +// WriteAnyBytes: fine. +// ReadAnyBytes: fine. +const char * kInvalidUTF8String = "Invalid UTF-8: \xA0\xB0\xC0\xD0"; +const char * kValidUTF8String = "Valid UTF-8: \x01\x02\u8C37\u6B4C"; + +template<typename T> +bool WriteMessage(const char *value, T *message, string *wire_buffer) { + message->set_data(value); + wire_buffer->clear(); + message->AppendToString(wire_buffer); + return (wire_buffer->size() > 0); +} + +template<typename T> +bool ReadMessage(const string &wire_buffer, T *message) { + return message->ParseFromArray(wire_buffer.data(), wire_buffer.size()); +} + +TEST(Utf8ValidationTest, WriteInvalidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + vector<string> errors; + { + ScopedMemoryLog log; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + ASSERT_EQ(1, errors.size()); + EXPECT_EQ("Encountered string containing invalid UTF-8 data while " + "serializing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes.", + errors[0]); + +#else + ASSERT_EQ(0, errors.size()); +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +} + +TEST(Utf8ValidationTest, ReadInvalidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneString output; + vector<string> errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + ASSERT_EQ(1, errors.size()); + EXPECT_EQ("Encountered string containing invalid UTF-8 data while " + "parsing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes.", + errors[0]); + +#else + ASSERT_EQ(0, errors.size()); +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +} + +TEST(Utf8ValidationTest, WriteValidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + vector<string> errors; + { + ScopedMemoryLog log; + WriteMessage(kValidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); +} + +TEST(Utf8ValidationTest, ReadValidUTF8String) { + string wire_buffer; + protobuf_unittest::OneString input; + WriteMessage(kValidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneString output; + vector<string> errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); + EXPECT_EQ(input.data(), output.data()); +} + +// Bytes: anything can pass as bytes, use invalid UTF-8 string to test +TEST(Utf8ValidationTest, WriteArbitraryBytes) { + string wire_buffer; + protobuf_unittest::OneBytes input; + vector<string> errors; + { + ScopedMemoryLog log; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); +} + +TEST(Utf8ValidationTest, ReadArbitraryBytes) { + string wire_buffer; + protobuf_unittest::OneBytes input; + WriteMessage(kInvalidUTF8String, &input, &wire_buffer); + protobuf_unittest::OneBytes output; + vector<string> errors; + { + ScopedMemoryLog log; + ReadMessage(wire_buffer, &output); + errors = log.GetMessages(ERROR); + } + ASSERT_EQ(0, errors.size()); + EXPECT_EQ(input.data(), output.data()); +} + } // namespace } // namespace internal } // namespace protobuf diff --git a/vsprojects/libprotobuf.vcproj b/vsprojects/libprotobuf.vcproj index 6edd8453..1da0b8ae 100644 --- a/vsprojects/libprotobuf.vcproj +++ b/vsprojects/libprotobuf.vcproj @@ -356,6 +356,10 @@ > </File> <File + RelativePath="..\src\google\protobuf\stubs\structurally_valid.cc" + > + </File> + <File RelativePath="..\src\google\protobuf\text_format.cc" > </File> diff --git a/vsprojects/tests.vcproj b/vsprojects/tests.vcproj index 41480163..e50932b6 100644 --- a/vsprojects/tests.vcproj +++ b/vsprojects/tests.vcproj @@ -287,6 +287,10 @@ > </File> <File + RelativePath="..\src\google\protobuf\stubs\structurally_valid_unittest.cc" + > + </File> + <File RelativePath="..\src\google\protobuf\io\coded_stream_unittest.cc" > </File> |