From d61aede89cf188367766b971f59cf57d7835d8e8 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 4 Sep 2018 10:58:54 -0700 Subject: Down-integrate from google3. --- src/google/protobuf/message_lite.cc | 411 +++++++++++++++++++++++------------- 1 file changed, 263 insertions(+), 148 deletions(-) (limited to 'src/google/protobuf/message_lite.cc') diff --git a/src/google/protobuf/message_lite.cc b/src/google/protobuf/message_lite.cc index 33ee6323..01bb0d39 100644 --- a/src/google/protobuf/message_lite.cc +++ b/src/google/protobuf/message_lite.cc @@ -80,7 +80,8 @@ void ByteSizeConsistencyError(size_t byte_size_before_serialization, GOOGLE_CHECK_EQ(bytes_produced_by_serialization, byte_size_before_serialization) << "Byte size calculation and serialization were inconsistent. This " "may indicate a bug in protocol buffers or it may be caused by " - "concurrent modification of " << message.GetTypeName() << "."; + "concurrent modification of " + << message.GetTypeName() << "."; GOOGLE_LOG(FATAL) << "This shouldn't be called if all the sizes are equal."; } @@ -108,12 +109,16 @@ string InitializationErrorMessage(const char* action, #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER // This is wrapper to turn a ZeroCopyInputStream (ZCIS) into a // InputStreamWithOverlap. This is done by copying data around the seams, -// pictorially if ZCIS presents a stream in chunks like so +// hence the name EpsCopyInputStream, pictorially if ZCIS presents a stream +// in chunks like so // [---------------------------------------------------------------] // [---------------------] chunk 1 // [----------------------------] chunk 2 // chunk 3 [--------------] -// this class will convert this into chunks +// where '-' depicts bytes of the stream or chunks vertically alligned with the +// corresponding bytes between stream and chunk. +// +// This class will convert this into chunks // [-----------------....] chunk 1 // [----....] patch // [------------------------....] chunk 2 @@ -121,17 +126,58 @@ string InitializationErrorMessage(const char* action, // chunk 3 [----------....] // patch [----****] // by using a fixed size buffer to patch over the seams. This requires -// copying of an "epsilon" neighboorhood around the seams. - +// copying of an "epsilon" neighboorhood around the seams. In the picture above +// dots mean bytes beyond the end of the new chunks. Each chunk is kSlopBytes +// smalller as its original chunk (above depicted as 4 dots) and the number of +// of chunks is doubled because each seam in the original stream introduces a +// new patch. +// +// The algorithm is simple but not entirely trivial. Two complications arise +// 1) The original chunk could be less than kSlopBytes. Hence we can't simply +// chop the last kSlopBytes of a chunk. +// 2) We need to leave the underlying CodedInputStream (CIS) precisely at the +// last byte read in the parse. In most cases a parse ends on a limit or end of +// the ZeroCopyInputStream, which is not problematic because CIS will never give +// us data beyond that. But the parse can end on a 0 end tag or an end group. +// If that happens in the first kSlopBytes of the patch (which are copied +// from the previous buffer) the CIS has already moved to the next chunk to +// copy the remaining bytes of the patch buffer. There exist no API to rollback +// to a previous buffer. +// +// We model this as a state machine. A call to get the next chunk either returns +// an original chunk except the last kSlopBytes or it has to copy the last +// kSlopBytes of the current chunk to the patch buffer and copy the first +// kSlopBytes of the next chunk to the end of the patch buffer. +// +// In order to deal with problem 1, we need to deal with the case that a new +// chunk can be less or equal than kSlopBytes big. We can just copy the chunk +// to the end and return (buffer, chunk->size). Pictorially +// [--------] chunk 1 +// [--] chunk 2 +// [---] chunk 3 +// will become +// [----....] chunk 1 +// [--....] patch (not full range of the buffer, only two hyphens) +// [--] chunk 2 (too small so never returned as buffer) +// [---....] patch (not full range of the buffer, only three hyphens) +// [---] chunk 3 (too small so never returned as buffer) +// [----****] patch (full range, last bytes are garbage) +// Because of this the source (the dots in above) can overlap with the +// destination buffer and so we have to use memmove. +// +// To solve problem 2, we verify after copying the last kSlopBytes the parse +// won't end before we continue to get the next chunk. template class EpsCopyInputStream { public: EpsCopyInputStream(io::CodedInputStream* input) : input_(input) {} ~EpsCopyInputStream() { - if (skip_) input_->Skip(skip_); + ABSL_ASSERT(skip_ >= 0); + input_->Skip(skip_); } - StringPiece NextWithOverlap() { + template + StringPiece SafeNextWithOverlap(const EnsureNotEnd& ensure_not_end) { switch (next_state_) { case kEOS: // End of stream @@ -141,140 +187,118 @@ class EpsCopyInputStream { // To parse the last kSlopBytes we need to copy the bytes into the // buffer. Hence we set, next_state_ = kBuffer; + skip_ = chunk_.size() - kSlopBytes; return {chunk_.begin(), chunk_.size() - kSlopBytes}; - case kBuffer: + case kBuffer: { // We have to parse the last kSlopBytes of chunk_, which could alias // buffer_ so we have to memmove. std::memmove(buffer_, chunk_.end() - kSlopBytes, kSlopBytes); - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - next_state_ = kChunk; - std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), kSlopBytes); - return {buffer_, kSlopBytes}; - } else if (chunk_.empty()) { + skip_ += kSlopBytes; + // We need to fill in the other half of buffer_ with the start of the + // next chunk. So we need to continue to the next buffer in the ZCIS, + // which makes it impossible to rollback to the current buffer :( + // We need to verify this won't happen. + if (!ensure_not_end(buffer_, kSlopBytes)) { + // We are guaranteed to exit in this interval. next_state_ = kEOS; return {buffer_, kSlopBytes}; - } else { - auto size = chunk_.size(); - // The next chunk is not big enough. So we copy it in the current - // after the current buffer. Resulting in a buffer with - // size + kSlopBytes bytes. - std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), size); - chunk_ = {buffer_, size + kSlopBytes}; - return {buffer_, size}; - } - case kStart: { - size_t i = 0; - do { - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - if (i == 0) { - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - } - std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); - next_state_ = kChunk; - return {buffer_, i}; - } - if (chunk_.empty()) { - next_state_ = kEOS; - return {buffer_, i}; - } - std::memcpy(buffer_ + i, chunk_.begin(), chunk_.size()); - i += chunk_.size(); - } while (i <= kSlopBytes); - chunk_ = {buffer_, i}; - next_state_ = kBuffer; - return {buffer_, i - kSlopBytes}; - } - } - } - - StringPiece NextWithOverlapEndingSafe(const char* ptr, int nesting) { - switch (next_state_) { - case kEOS: - // End of stream - return nullptr; - case kChunk: - // chunk_ contains a buffer of sufficient size (> kSlopBytes). - // To parse the last kSlopBytes we need to copy the bytes into the - // buffer. Hence we set, - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - case kBuffer: - // We have to parse the last kSlopBytes of chunk_, which could alias - // buffer_ so we have to memmove. - if (!SafeCopy(buffer_, chunk_.end() - kSlopBytes, nesting)) { - // We will terminate } chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { + auto size = chunk_.size(); + if (size > kSlopBytes) { next_state_ = kChunk; std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), kSlopBytes); return {buffer_, kSlopBytes}; - } else if (chunk_.empty()) { + } else if (size == 0) { next_state_ = kEOS; return {buffer_, kSlopBytes}; } else { - auto size = chunk_.size(); + // next_state_ = kBuffer, but this is unnecessary + // The next chunk is not big enough. So we copy it in the current // after the current buffer. Resulting in a buffer with // size + kSlopBytes bytes. std::memcpy(buffer_ + kSlopBytes, chunk_.begin(), size); + // skip_ becomes negative here. + skip_ += size - kSlopBytes; chunk_ = {buffer_, size + kSlopBytes}; return {buffer_, size}; } + } case kStart: { + chunk_ = GetChunk(); + auto size = chunk_.size(); + if (PROTOBUF_PREDICT_TRUE(size > kSlopBytes)) { + next_state_ = kBuffer; + skip_ = size - kSlopBytes; + return {chunk_.begin(), size - kSlopBytes}; + } size_t i = 0; do { - chunk_ = GetChunk(); - if (chunk_.size() > kSlopBytes) { - if (i == 0) { - next_state_ = kBuffer; - return {chunk_.begin(), chunk_.size() - kSlopBytes}; - } - std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); - next_state_ = kChunk; + if (size == 0) { + next_state_ = kEOS; return {buffer_, i}; } - if (chunk_.empty()) { + std::memcpy(buffer_ + i, chunk_.begin(), size); + ABSL_ASSERT(skip_ == 0); + skip_ = size; + i += size; + if (i > kSlopBytes) { + skip_ -= kSlopBytes; + chunk_ = {buffer_, i}; + next_state_ = kBuffer; + return {buffer_, i - kSlopBytes}; + } + if (!ensure_not_end(buffer_, i)) { next_state_ = kEOS; return {buffer_, i}; } - std::memcpy(buffer_ + i, chunk_.begin(), chunk_.size()); - i += chunk_.size(); - } while (i <= kSlopBytes); - chunk_ = {buffer_, i}; - next_state_ = kBuffer; - return {buffer_, i - kSlopBytes}; + chunk_ = GetChunk(); + size = chunk_.size(); + } while (size <= kSlopBytes); + std::memcpy(buffer_ + i, chunk_.begin(), kSlopBytes); + next_state_ = kChunk; + return {buffer_, i}; } } } - void Backup(const char* ptr) { skip_ = ptr - chunk_.data(); } + StringPiece NextWithOverlap() { + return SafeNextWithOverlap([](const char*, size_t) { return true; }); + } + + void AdjustPos(int delta) { + ABSL_ASSERT(delta <= kSlopBytes); + skip_ += delta; + } + + void SetError() { skip_ = 0; } private: io::CodedInputStream* input_; StringPiece chunk_; - char buffer_[2 * kSlopBytes]; + char buffer_[2 * kSlopBytes] = {}; enum State { - kEOS = 0, // -> end of stream. - kChunk = 1, // -> chunk_ contains the data for Next. - kBuffer = 2, // -> We need to copy the left over from previous chunk_ and - // load and patch the start of the next chunk in the - // local buffer. - kStart = 3, + kStart, + kEOS, // -> end of stream. + kChunk, // -> chunk_ contains the data for Next. + kBuffer, // -> We need to copy the left over from previous chunk_ and + // load and patch the start of the next chunk in the + // local buffer. }; State next_state_ = kStart; - int skip_ = 0; + int skip_ = 0; // how much bytes to skip to current position in the stream. StringPiece GetChunk() { const void* ptr; - if (skip_) input_->Skip(skip_); - if (!input_->GetDirectBufferPointer(&ptr, &skip_)) { + ABSL_ASSERT(skip_ >= 0); + input_->Skip(skip_); + skip_ = 0; + int size; + if (!input_->GetDirectBufferPointer(&ptr, &size)) { return nullptr; } - return StringPiece(static_cast(ptr), skip_); + return StringPiece(static_cast(ptr), size); } }; #endif @@ -287,29 +311,71 @@ class EpsCopyInputStream { // messages, every function call introduces significant overhead. To avoid // this without reproducing code, we use these forced-inline helpers. -inline bool InlineMergePartialFromCodedStream(io::CodedInputStream* input, +inline bool InlineMergeFromCodedStream(io::CodedInputStream* input, + MessageLite* message) { + if (!message->MergePartialFromCodedStream(input)) return false; + if (!message->IsInitialized()) { + GOOGLE_LOG(ERROR) << InitializationErrorMessage("parse", *message); + return false; + } + return true; +} + +inline bool InlineParsePartialFromCodedStream(io::CodedInputStream* input, MessageLite* message) { + message->Clear(); + return message->MergePartialFromCodedStream(input); +} + +inline bool InlineParseFromCodedStream(io::CodedInputStream* input, + MessageLite* message) { + message->Clear(); + return InlineMergeFromCodedStream(input, message); +} + #if GOOGLE_PROTOBUF_ENABLE_EXPERIMENTAL_PARSER - EpsCopyInputStream eps_input(input); +template