diff options
Diffstat (limited to 'src/google/protobuf/message_lite.cc')
-rw-r--r-- | src/google/protobuf/message_lite.cc | 411 |
1 files changed, 263 insertions, 148 deletions
diff --git a/src/google/protobuf/message_lite.cc b/src/google/protobuf/message_lite.cc index 33ee6323..01bb0d39 100644 --- a/src/google/protobuf/message_lite.cc +++ b/src/google/protobuf/message_lite.cc @@ -80,7 +80,8 @@ void ByteSizeConsistencyError(size_t byte_size_before_serialization, GOOGLE_CHECK_EQ(bytes_produced_by_serialization, byte_size_before_serialization) << "Byte size calculation and serialization were inconsistent. This " "may indicate a bug in protocol buffers or it may be caused by " - "concurrent modification of " << message.GetTypeName() << "."; + "concurrent modification of " + << message.GetTypeName() << "."; GOOGLE_LOG(FATAL) << "This shouldn't be called if all the sizes are equal."; } @@ -108,12 +109,16 @@ string InitializationErrorMessage(const char* action, #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER // This is wrapper to turn a ZeroCopyInputStream (ZCIS) into a // InputStreamWithOverlap. This is done by copying data around the seams, -// pictorially if ZCIS presents a stream in chunks like so +// hence the name EpsCopyInputStream, pictorially if ZCIS presents a stream +// in chunks like so // [---------------------------------------------------------------] // [---------------------] chunk 1 // [----------------------------] chunk 2 // chunk 3 [--------------] -// this class will convert this into chunks +// where '-' depicts bytes of the stream or chunks vertically alligned with the +// corresponding bytes between stream and chunk. +// +// This class will convert this into chunks // [-----------------....] chunk 1 // [----....] patch // [------------------------....] chunk 2 @@ -121,17 +126,58 @@ string InitializationErrorMessage(const char* action, // chunk 3 [----------....] // patch [----****] // by using a fixed size buffer to patch over the seams. This requires -// copying of an "epsilon" neighboorhood around the seams. - +// copying of an "epsilon" neighboorhood around the seams. In the picture above +// dots mean bytes beyond the end of the new chunks. Each chunk is kSlopBytes +// smalller as its original chunk (above depicted as 4 dots) and the number of +// of chunks is doubled because each seam in the original stream introduces a +// new patch. +// +// The algorithm is simple but not entirely trivial. Two complications arise +// 1) The original chunk could be less than kSlopBytes. Hence we can't simply +// chop the last kSlopBytes of a chunk. +// 2) We need to leave the underlying CodedInputStream (CIS) precisely at the +// last byte read in the parse. In most cases a parse ends on a limit or end of +// the ZeroCopyInputStream, which is not problematic because CIS will never give +// us data beyond that. But the parse can end on a 0 end tag or an end group. +// If that happens in the first kSlopBytes of the patch (which are copied +// from the previous buffer) the CIS has already moved to the next chunk to +// copy the remaining bytes of the patch buffer. There exist no API to rollback +// to a previous buffer. +// +// We model this as a state machine. A call to get the next chunk either returns +// an original chunk except the last kSlopBytes or it has to copy the last +// kSlopBytes of the current chunk to the patch buffer and copy the first +// kSlopBytes of the next chunk to the end of the patch buffer. +// +// In order to deal with problem 1, we need to deal with the case that a new +// chunk can be less or equal than kSlopBytes big. We can just copy the chunk +// to the end and return (buffer, chunk->size). Pictorially +// [--------] chunk 1 +// [--] chunk 2 +// [---] chunk 3 +// will become +// [----....] chunk 1 +// [--....] patch (not full range of the buffer, only two hyphens) +// [--] chunk 2 (too small so never returned as buffer) +// [---....] patch (not full range of the buffer, only three hyphens) +// [---] chunk 3 (too small so never returned as buffer) +// [----****] patch (full range, last bytes are garbage) +// Because of this the source (the dots in above) can overlap with the +// destination buffer and so we have to use memmove. +// +// To solve problem 2, we verify after copying the last kSlopBytes the parse +// won't end before we continue to get the next chunk. template <int kSlopBytes> class EpsCopyInputStream { public: EpsCopyInputStream(io::CodedInputStream* input) : input_(input) {} ~EpsCopyInputStream() { - if (skip_) input_->Skip(skip_); + ABSL_ASSERT(skip_ >= 0); + input_->Skip(skip_); } - StringPiece NextWithOverlap() { + template <typename EnsureNotEnd> + StringPiece SafeNextWithOverlap(const EnsureNotEnd& ensure_not_end) { switch (next_state_) { case kEOS: // End of stream @@ -141,140 +187,118 @@ class EpsCopyInputStream { // To parse the last kSlopBytes we need to copy the bytes into the // buffer. Hence we set, next_state_ = kBuffer; + skip_ = chunk_.size() - kSlopBytes; return {chunk_.begin(), chunk_.size() - kSlopBytes}; - case kBuffer: + case kBuffer: { // We have to parse the last kSlopBytes of chunk_, which could alias // buffer_ so we have to memmove. std::memmove(buffer_, chunk_.end() - kSlopBytes, kSlopBytes); - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - next_state_ = kChunk; - std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), kSlopBytes); - return {buffer_, kSlopBytes}; - } else if (chunk_.empty()) { + skip_ += kSlopBytes; + // We need to fill in the other half of buffer_ with the start of the + // next chunk. So we need to continue to the next buffer in the ZCIS, + // which makes it impossible to rollback to the current buffer :( + // We need to verify this won't happen. + if (!ensure_not_end(buffer_, kSlopBytes)) { + // We are guaranteed to exit in this interval. next_state_ = kEOS; return {buffer_, kSlopBytes}; - } else { - auto size = chunk_.size(); - // The next chunk is not big enough. So we copy it in the current - // after the current buffer. Resulting in a buffer with - // size + kSlopBytes bytes. - std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), size); - chunk_ = {buffer_, size + kSlopBytes}; - return {buffer_, size}; - } - case kStart: { - size_t i = 0; - do { - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - if (i == 0) { - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - } - std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); - next_state_ = kChunk; - return {buffer_, i}; - } - if (chunk_.empty()) { - next_state_ = kEOS; - return {buffer_, i}; - } - std::memcpy(buffer_ + i, chunk_.begin(), chunk_.size()); - i += chunk_.size(); - } while (i <= kSlopBytes); - chunk_ = {buffer_, i}; - next_state_ = kBuffer; - return {buffer_, i - kSlopBytes}; - } - } - } - - StringPiece NextWithOverlapEndingSafe(const char* ptr, int nesting) { - switch (next_state_) { - case kEOS: - // End of stream - return nullptr; - case kChunk: - // chunk_ contains a buffer of sufficient size (> kSlopBytes). - // To parse the last kSlopBytes we need to copy the bytes into the - // buffer. Hence we set, - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - case kBuffer: - // We have to parse the last kSlopBytes of chunk_, which could alias - // buffer_ so we have to memmove. - if (!SafeCopy(buffer_, chunk_.end() - kSlopBytes, nesting)) { - // We will terminate } chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { + auto size = chunk_.size(); + if (size > kSlopBytes) { next_state_ = kChunk; std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), kSlopBytes); return {buffer_, kSlopBytes}; - } else if (chunk_.empty()) { + } else if (size == 0) { next_state_ = kEOS; return {buffer_, kSlopBytes}; } else { - auto size = chunk_.size(); + // next_state_ = kBuffer, but this is unnecessary + // The next chunk is not big enough. So we copy it in the current // after the current buffer. Resulting in a buffer with // size + kSlopBytes bytes. std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), size); + // skip_ becomes negative here. + skip_ += size - kSlopBytes; chunk_ = {buffer_, size + kSlopBytes}; return {buffer_, size}; } + } case kStart: { + chunk_ = GetChunk(); + auto size = chunk_.size(); + if (PROTOBUF_PREDICT_TRUE(size > kSlopBytes)) { + next_state_ = kBuffer; + skip_ = size - kSlopBytes; + return {chunk_.begin(), size - kSlopBytes}; + } size_t i = 0; do { - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - if (i == 0) { - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - } - std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); - next_state_ = kChunk; + if (size == 0) { + next_state_ = kEOS; return {buffer_, i}; } - if (chunk_.empty()) { + std::memcpy(buffer_ + i, chunk_.begin(), size); + ABSL_ASSERT(skip_ == 0); + skip_ = size; + i += size; + if (i > kSlopBytes) { + skip_ -= kSlopBytes; + chunk_ = {buffer_, i}; + next_state_ = kBuffer; + return {buffer_, i - kSlopBytes}; + } + if (!ensure_not_end(buffer_, i)) { next_state_ = kEOS; return {buffer_, i}; } - std::memcpy(buffer_ + i, chunk_.begin(), chunk_.size()); - i += chunk_.size(); - } while (i <= kSlopBytes); - chunk_ = {buffer_, i}; - next_state_ = kBuffer; - return {buffer_, i - kSlopBytes}; + chunk_ = GetChunk(); + size = chunk_.size(); + } while (size <= kSlopBytes); + std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); + next_state_ = kChunk; + return {buffer_, i}; } } } - void Backup(const char* ptr) { skip_ = ptr - chunk_.data(); } + StringPiece NextWithOverlap() { + return SafeNextWithOverlap([](const char*, size_t) { return true; }); + } + + void AdjustPos(int delta) { + ABSL_ASSERT(delta <= kSlopBytes); + skip_ += delta; + } + + void SetError() { skip_ = 0; } private: io::CodedInputStream* input_; StringPiece chunk_; - char buffer_[2 * kSlopBytes]; + char buffer_[2 * kSlopBytes] = {}; enum State { - kEOS = 0, // -> end of stream. - kChunk = 1, // -> chunk_ contains the data for Next. - kBuffer = 2, // -> We need to copy the left over from previous chunk_ and - // load and patch the start of the next chunk in the - // local buffer. - kStart = 3, + kStart, + kEOS, // -> end of stream. + kChunk, // -> chunk_ contains the data for Next. + kBuffer, // -> We need to copy the left over from previous chunk_ and + // load and patch the start of the next chunk in the + // local buffer. }; State next_state_ = kStart; - int skip_ = 0; + int skip_ = 0; // how much bytes to skip to current position in the stream. StringPiece GetChunk() { const void* ptr; - if (skip_) input_->Skip(skip_); - if (!input_->GetDirectBufferPointer(&ptr, &skip_)) { + ABSL_ASSERT(skip_ >= 0); + input_->Skip(skip_); + skip_ = 0; + int size; + if (!input_->GetDirectBufferPointer(&ptr, &size)) { return nullptr; } - return StringPiece(static_cast<const char*>(ptr), skip_); + return StringPiece(static_cast<const char*>(ptr), size); } }; #endif @@ -287,29 +311,71 @@ class EpsCopyInputStream { // messages, every function call introduces significant overhead. To avoid // this without reproducing code, we use these forced-inline helpers. -inline bool InlineMergePartialFromCodedStream(io::CodedInputStream* input, +inline bool InlineMergeFromCodedStream(io::CodedInputStream* input, + MessageLite* message) { + if (!message->MergePartialFromCodedStream(input)) return false; + if (!message->IsInitialized()) { + GOOGLE_LOG(ERROR) << InitializationErrorMessage("parse", *message); + return false; + } + return true; +} + +inline bool InlineParsePartialFromCodedStream(io::CodedInputStream* input, MessageLite* message) { + message->Clear(); + return message->MergePartialFromCodedStream(input); +} + +inline bool InlineParseFromCodedStream(io::CodedInputStream* input, + MessageLite* message) { + message->Clear(); + return InlineMergeFromCodedStream(input, message); +} + #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER - EpsCopyInputStream<internal::ParseContext::kSlopBytes> eps_input(input); +template <template <int> class Input> +inline bool InlineMergePartialEntireInput( + Input<internal::ParseContext::kSlopBytes>* input, MessageLite* message) { internal::ParseContext ctx; - auto res = ctx.ParseNoLimit({message->_ParseFunc(), message}, &eps_input); - if (res == 1) { - input->SetConsumed(); + + auto chunk = input->NextWithOverlap(); + if (chunk.empty()) { return true; - } else if (res == 2) { - return false; - } else { - input->SetLastTag(res); + } + auto res = ctx.StartParse({message->_ParseFunc(), message}, chunk); + while (res.first == internal::ParseContext::kContinue) { + int overrun = res.second; + chunk = input->NextWithOverlap(); + if (chunk.empty()) { + if (!ctx.ValidEnd(overrun)) return false; + return true; + } + res = ctx.ResumeParse(chunk, overrun); + } + // Either failure or ended on a zero or end-group tag + return false; +} +#endif + +inline bool InlineMergePartialEntireStream(io::CodedInputStream* cis, + MessageLite* message) { +#if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER + EpsCopyInputStream<internal::ParseContext::kSlopBytes> input(cis); + if (InlineMergePartialEntireInput(&input, message)) { + cis->SetConsumed(); return true; } + return false; #else - return message->MergePartialFromCodedStream(input); + return message->MergePartialFromCodedStream(cis) && + cis->ConsumedEntireMessage(); #endif } -inline bool InlineMergeFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { - if (!InlineMergePartialFromCodedStream(input, message)) return false; +inline bool InlineMergeEntireStream(io::CodedInputStream* input, + MessageLite* message) { + if (!InlineMergePartialEntireStream(input, message)) return false; if (!message->IsInitialized()) { GOOGLE_LOG(ERROR) << InitializationErrorMessage("parse", *message); return false; @@ -317,16 +383,16 @@ inline bool InlineMergeFromCodedStream(io::CodedInputStream* input, return true; } -inline bool InlineParsePartialFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { +inline bool InlineParsePartialEntireStream(io::CodedInputStream* input, + MessageLite* message) { message->Clear(); - return InlineMergePartialFromCodedStream(input, message); + return InlineMergePartialEntireStream(input, message); } -inline bool InlineParseFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { +inline bool InlineParseEntireStream(io::CodedInputStream* input, + MessageLite* message) { message->Clear(); - return InlineMergeFromCodedStream(input, message); + return InlineMergeEntireStream(input, message); } #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER @@ -334,30 +400,49 @@ template <int kSlopBytes> class ArrayInput { public: ArrayInput(StringPiece chunk) : chunk_(chunk) {} + StringPiece NextWithOverlap() { - auto res = chunk_; - chunk_ = nullptr; - return res; + auto s = chunk_.size(); + if (s > 16) { + auto res = chunk_.substr(0, s - 16); + chunk_ = chunk_.substr(s - 16); + return res; + } else if (s == 0) { + return nullptr; + } else { + std::memcpy(buffer_, chunk_.begin(), s); + chunk_ = nullptr; + return {buffer_, s}; + } } - void Backup(const char*) { GOOGLE_CHECK(false) << "Can't backup arrayinput"; } + void SetError() {} private: StringPiece chunk_; + char buffer_[32] = {}; + int state_ = 0; }; #endif inline bool InlineMergePartialFromArray(const void* data, int size, - MessageLite* message, + MessageLite* msg, bool aliasing = false) { #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER - internal::ParseContext ctx; + auto begin = static_cast<const char*>(data); + if (aliasing) { + // TODO(gerbens) make this safe against corruption buffer overflow. + // Short cut to allow aliasing string_piece + internal::ParseContext ctx; + ctx.extra_parse_data().aliasing = true; + return ctx.ParseExactRange({msg->_ParseFunc(), msg}, begin, begin + size); + } ArrayInput<internal::ParseContext::kSlopBytes> input( - StringPiece(static_cast<const char*>(data), size)); - return ctx.Parse({message->_ParseFunc(), message}, size, &input); + StringPiece(begin, size)); + return InlineMergePartialEntireInput(&input, msg); #else io::CodedInputStream input(static_cast<const uint8*>(data), size); - return message->MergePartialFromCodedStream(&input) && + return msg->MergePartialFromCodedStream(&input) && input.ConsumedEntireMessage(); #endif } @@ -400,8 +485,44 @@ MessageLite* MessageLite::New(Arena* arena) const { } #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER -bool MessageLite::MergePartialFromCodedStream(io::CodedInputStream* input) { - return InlineMergePartialFromCodedStream(input, this); +bool MessageLite::MergePartialFromCodedStream(io::CodedInputStream* cis) { + EpsCopyInputStream<internal::ParseContext::kSlopBytes> input(cis); + internal::ParseContext ctx(cis->RecursionBudget()); + ctx.extra_parse_data().pool = cis->GetExtensionPool(); + ctx.extra_parse_data().factory = cis->GetExtensionFactory(); + + auto chunk = input.SafeNextWithOverlap( + [&ctx](const char* ptr, int n) { return ctx.EnsureNoEnd(ptr, n, 0); }); + if (chunk.empty()) { + cis->SetConsumed(); + return true; + } + auto res = ctx.StartParse({_ParseFunc(), this}, chunk); + while (res.first == internal::ParseContext::kContinue) { + int overrun = res.second; + chunk = input.SafeNextWithOverlap([&ctx, overrun](const char* ptr, int n) { + return ctx.EnsureNoEnd(ptr, n, overrun); + }); + if (chunk.empty()) { + if (!ctx.ValidEnd(overrun)) goto error; + cis->SetConsumed(); + return true; + } + res = ctx.ResumeParse(chunk, overrun); + } + // Either failure or ended on a zero or end-group tag + if (res.first != internal::ParseContext::kFailure) { + cis->SetLastTag(res.first); + input.AdjustPos(res.second); + return true; + } +error: + // Error can happen at a spot from which we can't back up. But in this case + // the user can't resume the stream as the error could be in an arbitrary + // location in the stream, so just leave the stream alone. This prevents + // triggering assertion fail. + input.SetError(); + return false; } #endif @@ -419,22 +540,20 @@ bool MessageLite::ParsePartialFromCodedStream(io::CodedInputStream* input) { bool MessageLite::ParseFromZeroCopyStream(io::ZeroCopyInputStream* input) { io::CodedInputStream decoder(input); - return ParseFromCodedStream(&decoder) && decoder.ConsumedEntireMessage(); + return InlineParseEntireStream(&decoder, this); } bool MessageLite::ParsePartialFromZeroCopyStream( io::ZeroCopyInputStream* input) { io::CodedInputStream decoder(input); - return ParsePartialFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage(); + return InlineParsePartialEntireStream(&decoder, this); } -bool MessageLite::ParseFromBoundedZeroCopyStream( - io::ZeroCopyInputStream* input, int size) { +bool MessageLite::ParseFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, + int size) { io::CodedInputStream decoder(input); decoder.PushLimit(size); - return ParseFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage() && + return InlineParseEntireStream(&decoder, this) && decoder.BytesUntilLimit() == 0; } @@ -442,8 +561,7 @@ bool MessageLite::ParsePartialFromBoundedZeroCopyStream( io::ZeroCopyInputStream* input, int size) { io::CodedInputStream decoder(input); decoder.PushLimit(size); - return ParsePartialFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage() && + return InlineParsePartialEntireStream(&decoder, this) && decoder.BytesUntilLimit() == 0; } @@ -480,8 +598,8 @@ bool MessageLite::SerializePartialToCodedStream( io::CodedOutputStream* output) const { const size_t size = ByteSizeLong(); // Force size to be cached. if (size > INT_MAX) { - GOOGLE_LOG(ERROR) << GetTypeName() << " exceeded maximum protobuf size of 2GB: " - << size; + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << size; return false; } @@ -531,8 +649,8 @@ bool MessageLite::AppendPartialToString(string* output) const { size_t old_size = output->size(); size_t byte_size = ByteSizeLong(); if (byte_size > INT_MAX) { - GOOGLE_LOG(ERROR) << GetTypeName() << " exceeded maximum protobuf size of 2GB: " - << byte_size; + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << byte_size; return false; } @@ -564,8 +682,8 @@ bool MessageLite::SerializeToArray(void* data, int size) const { bool MessageLite::SerializePartialToArray(void* data, int size) const { const size_t byte_size = ByteSizeLong(); if (byte_size > INT_MAX) { - GOOGLE_LOG(ERROR) << GetTypeName() << " exceeded maximum protobuf size of 2GB: " - << byte_size; + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << byte_size; return false; } if (size < byte_size) return false; @@ -583,15 +701,13 @@ string MessageLite::SerializeAsString() const { // of this function, but will be overlaid with the object that the // caller supplied for the return value to be constructed in. string output; - if (!AppendToString(&output)) - output.clear(); + if (!AppendToString(&output)) output.clear(); return output; } string MessageLite::SerializePartialAsString() const { string output; - if (!AppendPartialToString(&output)) - output.clear(); + if (!AppendPartialToString(&output)) output.clear(); return output; } @@ -640,9 +756,8 @@ void GenericTypeHandler<MessageLite>::Merge(const MessageLite& from, MessageLite* to) { to->CheckTypeAndMergeFrom(from); } -template<> -void GenericTypeHandler<string>::Merge(const string& from, - string* to) { +template <> +void GenericTypeHandler<string>::Merge(const string& from, string* to) { *to = from; } |