diff options
author | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2008-11-21 00:06:27 +0000 |
---|---|---|
committer | kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> | 2008-11-21 00:06:27 +0000 |
commit | 26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch) | |
tree | d35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_inl.h | |
parent | a2a32c20434807e9966e3f48375f9419134d1b55 (diff) | |
download | protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.gz protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.bz2 protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.zip |
Integrate changes from internal code.
protoc
* Enum values may now have custom options, using syntax similar to field
options.
* Fixed bug where .proto files which use custom options but don't actually
define them (i.e. they import another .proto file defining the options)
had to explicitly import descriptor.proto.
* Adjacent string literals in .proto files will now be concatenated, like in
C.
C++
* Generated message classes now have a Swap() method which efficiently swaps
the contents of two objects.
* All message classes now have a SpaceUsed() method which returns an estimate
of the number of bytes of allocated memory currently owned by the object.
This is particularly useful when you are reusing a single message object
to improve performance but want to make sure it doesn't bloat up too large.
* New method Message::SerializeAsString() returns a string containing the
serialized data. May be more convenient than calling
SerializeToString(string*).
* In debug mode, log error messages when string-type fields are found to
contain bytes that are not valid UTF-8.
* Fixed bug where a message with multiple extension ranges couldn't parse
extensions.
* Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on
a message that contained no fields (but possibly contained extensions).
* Fixed ShortDebugString() to not be O(n^2). Durr.
* Fixed crash in TextFormat parsing if the first token in the input caused a
tokenization error.
Java
* New overload of mergeFrom() which parses a slice of a byte array instead
of the whole thing.
* New method ByteString.asReadOnlyByteBuffer() does what it sounds like.
* Improved performance of isInitialized() when optimizing for code size.
Python
* Corrected ListFields() signature in Message base class to match what
subclasses actually implement.
* Some minor refactoring.
Diffstat (limited to 'src/google/protobuf/wire_format_inl.h')
-rw-r--r-- | src/google/protobuf/wire_format_inl.h | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/src/google/protobuf/wire_format_inl.h b/src/google/protobuf/wire_format_inl.h index 6545ee80..539d8c67 100644 --- a/src/google/protobuf/wire_format_inl.h +++ b/src/google/protobuf/wire_format_inl.h @@ -36,10 +36,17 @@ #define GOOGLE_PROTOBUF_WIRE_FORMAT_INL_H__ #include <string> +#include <google/protobuf/stubs/common.h> #include <google/protobuf/wire_format.h> #include <google/protobuf/io/coded_stream.h> +// Do UTF-8 validation on string type in Debug build only +#ifndef NDEBUG +#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED +#endif + + namespace google { namespace protobuf { namespace internal { @@ -122,12 +129,18 @@ inline bool WireFormat::ReadEnum(io::CodedInputStream* input, int* value) { } inline bool WireFormat::ReadString(io::CodedInputStream* input, string* value) { - // WARNING: In wire_format.cc, both strings and bytes are handled by - // ReadString() to avoid code duplication. If the implementations become - // different, you will need to update that usage. + // String is for UTF-8 text only uint32 length; if (!input->ReadVarint32(&length)) return false; - return input->ReadString(value, length); + if (!input->ReadString(value, length)) return false; +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + if (!IsStructurallyValidUTF8(value->data(), length)) { + GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while " + "parsing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes."; + } +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + return true; } inline bool WireFormat::ReadBytes(io::CodedInputStream* input, string* value) { uint32 length; @@ -270,9 +283,14 @@ inline bool WireFormat::WriteEnum(int field_number, int value, inline bool WireFormat::WriteString(int field_number, const string& value, io::CodedOutputStream* output) { - // WARNING: In wire_format.cc, both strings and bytes are handled by - // WriteString() to avoid code duplication. If the implementations become - // different, you will need to update that usage. + // String is for UTF-8 text only +#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED + if (!IsStructurallyValidUTF8(value.data(), value.size())) { + GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while " + "serializing protocol buffer. Strings must contain only UTF-8; " + "use the 'bytes' type for raw bytes."; + } +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED return WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output) && output->WriteVarint32(value.size()) && output->WriteString(value); |