aboutsummaryrefslogtreecommitdiff
path: root/src/google/protobuf/wire_format_inl.h
diff options
context:
space:
mode:
authorkenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2008-11-21 00:06:27 +0000
committerkenton@google.com <kenton@google.com@630680e5-0e50-0410-840e-4b1c322b438d>2008-11-21 00:06:27 +0000
commit26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45 (patch)
treed35cca89e0da44f136090a554ff9abc93a794fa8 /src/google/protobuf/wire_format_inl.h
parenta2a32c20434807e9966e3f48375f9419134d1b55 (diff)
downloadprotobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.gz
protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.tar.bz2
protobuf-26bd9eee6ee6d116e1cc0dedeb660cd69d7aac45.zip
Integrate changes from internal code.
protoc * Enum values may now have custom options, using syntax similar to field options. * Fixed bug where .proto files which use custom options but don't actually define them (i.e. they import another .proto file defining the options) had to explicitly import descriptor.proto. * Adjacent string literals in .proto files will now be concatenated, like in C. C++ * Generated message classes now have a Swap() method which efficiently swaps the contents of two objects. * All message classes now have a SpaceUsed() method which returns an estimate of the number of bytes of allocated memory currently owned by the object. This is particularly useful when you are reusing a single message object to improve performance but want to make sure it doesn't bloat up too large. * New method Message::SerializeAsString() returns a string containing the serialized data. May be more convenient than calling SerializeToString(string*). * In debug mode, log error messages when string-type fields are found to contain bytes that are not valid UTF-8. * Fixed bug where a message with multiple extension ranges couldn't parse extensions. * Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on a message that contained no fields (but possibly contained extensions). * Fixed ShortDebugString() to not be O(n^2). Durr. * Fixed crash in TextFormat parsing if the first token in the input caused a tokenization error. Java * New overload of mergeFrom() which parses a slice of a byte array instead of the whole thing. * New method ByteString.asReadOnlyByteBuffer() does what it sounds like. * Improved performance of isInitialized() when optimizing for code size. Python * Corrected ListFields() signature in Message base class to match what subclasses actually implement. * Some minor refactoring.
Diffstat (limited to 'src/google/protobuf/wire_format_inl.h')
-rw-r--r--src/google/protobuf/wire_format_inl.h32
1 files changed, 25 insertions, 7 deletions
diff --git a/src/google/protobuf/wire_format_inl.h b/src/google/protobuf/wire_format_inl.h
index 6545ee80..539d8c67 100644
--- a/src/google/protobuf/wire_format_inl.h
+++ b/src/google/protobuf/wire_format_inl.h
@@ -36,10 +36,17 @@
#define GOOGLE_PROTOBUF_WIRE_FORMAT_INL_H__
#include <string>
+#include <google/protobuf/stubs/common.h>
#include <google/protobuf/wire_format.h>
#include <google/protobuf/io/coded_stream.h>
+// Do UTF-8 validation on string type in Debug build only
+#ifndef NDEBUG
+#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+#endif
+
+
namespace google {
namespace protobuf {
namespace internal {
@@ -122,12 +129,18 @@ inline bool WireFormat::ReadEnum(io::CodedInputStream* input, int* value) {
}
inline bool WireFormat::ReadString(io::CodedInputStream* input, string* value) {
- // WARNING: In wire_format.cc, both strings and bytes are handled by
- // ReadString() to avoid code duplication. If the implementations become
- // different, you will need to update that usage.
+ // String is for UTF-8 text only
uint32 length;
if (!input->ReadVarint32(&length)) return false;
- return input->ReadString(value, length);
+ if (!input->ReadString(value, length)) return false;
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ if (!IsStructurallyValidUTF8(value->data(), length)) {
+ GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+ "parsing protocol buffer. Strings must contain only UTF-8; "
+ "use the 'bytes' type for raw bytes.";
+ }
+#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ return true;
}
inline bool WireFormat::ReadBytes(io::CodedInputStream* input, string* value) {
uint32 length;
@@ -270,9 +283,14 @@ inline bool WireFormat::WriteEnum(int field_number, int value,
inline bool WireFormat::WriteString(int field_number, const string& value,
io::CodedOutputStream* output) {
- // WARNING: In wire_format.cc, both strings and bytes are handled by
- // WriteString() to avoid code duplication. If the implementations become
- // different, you will need to update that usage.
+ // String is for UTF-8 text only
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+ if (!IsStructurallyValidUTF8(value.data(), value.size())) {
+ GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+ "serializing protocol buffer. Strings must contain only UTF-8; "
+ "use the 'bytes' type for raw bytes.";
+ }
+#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
return WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output) &&
output->WriteVarint32(value.size()) &&
output->WriteString(value);