Integrate changes from internal code.

protoc * Enum values may now have custom options, using syntax similar to field options. * Fixed bug where .proto files which use custom options but don't actually define them (i.e. they import another .proto file defining the options) had to explicitly import descriptor.proto. * Adjacent string literals in .proto files will now be concatenated, like in C. C++ * Generated message classes now have a Swap() method which efficiently swaps the contents of two objects. * All message classes now have a SpaceUsed() method which returns an estimate of the number of bytes of allocated memory currently owned by the object. This is particularly useful when you are reusing a single message object to improve performance but want to make sure it doesn't bloat up too large. * New method Message::SerializeAsString() returns a string containing the serialized data. May be more convenient than calling SerializeToString(string*). * In debug mode, log error messages when string-type fields are found to contain bytes that are not valid UTF-8. * Fixed bug where a message with multiple extension ranges couldn't parse extensions. * Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on a message that contained no fields (but possibly contained extensions). * Fixed ShortDebugString() to not be O(n^2). Durr. * Fixed crash in TextFormat parsing if the first token in the input caused a tokenization error. Java * New overload of mergeFrom() which parses a slice of a byte array instead of the whole thing. * New method ByteString.asReadOnlyByteBuffer() does what it sounds like. * Improved performance of isInitialized() when optimizing for code size. Python * Corrected ListFields() signature in Message base class to match what subclasses actually implement. * Some minor refactoring.
author: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2008-11-21 00:06:27 +0000
committer: kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d> 2008-11-21 00:06:27 +0000
commit: 26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
tree: d35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_inl.h
parent: a2a32c20434807e9966e3f48375f9419134d1b55 (diff)
download: protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.gz
protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.bz2
protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.zip
1 files changed, 25 insertions, 7 deletions
diff --git a/src/google/protobuf/wire_format_inl.h b/src/google/protobuf/wire_format_inl.h
index 6545ee80..539d8c67 100644
--- a/src/google/protobuf/wire_format_inl.h
+++ b/src/google/protobuf/wire_format_inl.h
@@ -36,10 +36,17 @@
 #define GOOGLE_PROTOBUF_WIRE_FORMAT_INL_H__
 
 #include <string>
+#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/wire_format.h>
 #include <google/protobuf/io/coded_stream.h>
 
 
+// Do UTF-8 validation on string type in Debug build only
+#ifndef NDEBUG
+#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+#endif
+
+
 namespace google {
 namespace protobuf {
 namespace internal {
@@ -122,12 +129,18 @@ inline bool WireFormat::ReadEnum(io::CodedInputStream* input, int* value) {
 }
 
 inline bool WireFormat::ReadString(io::CodedInputStream* input, string* value) {
-  // WARNING:  In wire_format.cc, both strings and bytes are handled by
-  //   ReadString() to avoid code duplication.  If the implementations become
-  //   different, you will need to update that usage.
+  // String is for UTF-8 text only
   uint32 length;
   if (!input->ReadVarint32(&length)) return false;
-  return input->ReadString(value, length);
+  if (!input->ReadString(value, length)) return false;
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  if (!IsStructurallyValidUTF8(value->data(), length)) {
+    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+               "parsing protocol buffer. Strings must contain only UTF-8; "
+               "use the 'bytes' type for raw bytes.";
+  }
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  return true;
 }
 inline bool WireFormat::ReadBytes(io::CodedInputStream* input, string* value) {
   uint32 length;
@@ -270,9 +283,14 @@ inline bool WireFormat::WriteEnum(int field_number, int value,
 
 inline bool WireFormat::WriteString(int field_number, const string& value,
                                     io::CodedOutputStream* output) {
-  // WARNING:  In wire_format.cc, both strings and bytes are handled by
-  //   WriteString() to avoid code duplication.  If the implementations become
-  //   different, you will need to update that usage.
+  // String is for UTF-8 text only
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  if (!IsStructurallyValidUTF8(value.data(), value.size())) {
+    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+               "serializing protocol buffer. Strings must contain only UTF-8; "
+               "use the 'bytes' type for raw bytes.";
+  }
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
   return WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output) &&
          output->WriteVarint32(value.size()) &&
          output->WriteString(value);
author	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2008-11-21 00:06:27 +0000
committer	kenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>	2008-11-21 00:06:27 +0000
commit	26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
tree	d35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_inl.h
parent	a2a32c20434807e9966e3f48375f9419134d1b55 (diff)
download	protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.gz protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.bz2 protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.zip