diff options
Diffstat (limited to 'src/google/protobuf/io/coded_stream.cc')
-rw-r--r-- | src/google/protobuf/io/coded_stream.cc | 379 |
1 files changed, 181 insertions, 198 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc index e17a4775..6a91a13d 100644 --- a/src/google/protobuf/io/coded_stream.cc +++ b/src/google/protobuf/io/coded_stream.cc @@ -38,9 +38,9 @@ // will not cross the end of the buffer, since we can avoid a lot // of branching in this case. -#include <stack> +#include <google/protobuf/io/coded_stream_inl.h> +#include <algorithm> #include <limits.h> -#include <google/protobuf/io/coded_stream.h> #include <google/protobuf/io/zero_copy_stream.h> #include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/stl_util-inl.h> @@ -52,11 +52,6 @@ namespace io { namespace { -static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB - -static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB -static const int kDefaultRecursionLimit = 64; - static const int kMaxVarintBytes = 10; static const int kMaxVarint32Bytes = 5; @@ -65,72 +60,28 @@ static const int kMaxVarint32Bytes = 5; // CodedInputStream ================================================== -CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) - : input_(input), - buffer_(NULL), - buffer_size_(0), - total_bytes_read_(0), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(INT_MAX), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), - recursion_depth_(0), - recursion_limit_(kDefaultRecursionLimit) { - // Eagerly Refresh() so buffer space is immediately available. - Refresh(); -} - -CodedInputStream::CodedInputStream(const uint8* buffer, int size) - : input_(NULL), - buffer_(buffer), - buffer_size_(size), - total_bytes_read_(size), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(size), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), - recursion_depth_(0), - recursion_limit_(kDefaultRecursionLimit) { - // Note that setting current_limit_ == size is important to prevent some - // code paths from trying to access input_ and segfaulting. -} - -CodedInputStream::~CodedInputStream() { - if (input_ != NULL) { - BackUpInputToCurrentPosition(); - } -} - void CodedInputStream::BackUpInputToCurrentPosition() { - int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_; + int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; if (backup_bytes > 0) { input_->BackUp(backup_bytes); // total_bytes_read_ doesn't include overflow_bytes_. - total_bytes_read_ -= buffer_size_ + buffer_size_after_limit_; - buffer_size_ = 0; + total_bytes_read_ -= BufferSize() + buffer_size_after_limit_; + buffer_end_ = buffer_; buffer_size_after_limit_ = 0; overflow_bytes_ = 0; } } inline void CodedInputStream::RecomputeBufferLimits() { - buffer_size_ += buffer_size_after_limit_; + buffer_end_ += buffer_size_after_limit_; int closest_limit = min(current_limit_, total_bytes_limit_); if (closest_limit < total_bytes_read_) { // The limit position is in the current buffer. We must adjust // the buffer size accordingly. buffer_size_after_limit_ = total_bytes_read_ - closest_limit; - buffer_size_ -= buffer_size_after_limit_; + buffer_end_ -= buffer_size_after_limit_; } else { buffer_size_after_limit_ = 0; } @@ -139,7 +90,7 @@ inline void CodedInputStream::RecomputeBufferLimits() { CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { // Current position relative to the beginning of the stream. int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); Limit old_limit = current_limit_; @@ -176,7 +127,7 @@ void CodedInputStream::PopLimit(Limit limit) { int CodedInputStream::BytesUntilLimit() { if (current_limit_ == INT_MAX) return -1; int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); return current_limit_ - current_position; } @@ -186,7 +137,7 @@ void CodedInputStream::SetTotalBytesLimit( // Make sure the limit isn't already past, since this could confuse other // code. int current_position = total_bytes_read_ - - (buffer_size_ + buffer_size_after_limit_); + (BufferSize() + buffer_size_after_limit_); total_bytes_limit_ = max(current_position, total_bytes_limit); total_bytes_warning_threshold_ = warning_threshold; RecomputeBufferLimits(); @@ -203,7 +154,9 @@ void CodedInputStream::PrintTotalBytesLimitError() { bool CodedInputStream::Skip(int count) { if (count < 0) return false; // security: count is often user-supplied - if (count <= buffer_size_) { + const int original_buffer_size = BufferSize(); + + if (count <= original_buffer_size) { // Just skipping within the current buffer. Easy. Advance(count); return true; @@ -211,13 +164,13 @@ bool CodedInputStream::Skip(int count) { if (buffer_size_after_limit_ > 0) { // We hit a limit inside this buffer. Advance to the limit and fail. - Advance(buffer_size_); + Advance(original_buffer_size); return false; } - count -= buffer_size_; + count -= original_buffer_size; buffer_ = NULL; - buffer_size_ = 0; + buffer_end_ = buffer_; // Make sure this skip doesn't try to skip past the current limit. int closest_limit = min(current_limit_, total_bytes_limit_); @@ -236,20 +189,21 @@ bool CodedInputStream::Skip(int count) { } bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) { - if (buffer_size_ == 0 && !Refresh()) return false; + if (BufferSize() == 0 && !Refresh()) return false; *data = buffer_; - *size = buffer_size_; + *size = BufferSize(); return true; } bool CodedInputStream::ReadRaw(void* buffer, int size) { - while (buffer_size_ < size) { + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { // Reading past end of buffer. Copy what we have, then refresh. - memcpy(buffer, buffer_, buffer_size_); - buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_; - size -= buffer_size_; - Advance(buffer_size_); + memcpy(buffer, buffer_, current_buffer_size); + buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size; + size -= current_buffer_size; + Advance(current_buffer_size); if (!Refresh()) return false; } @@ -261,27 +215,25 @@ bool CodedInputStream::ReadRaw(void* buffer, int size) { bool CodedInputStream::ReadString(string* buffer, int size) { if (size < 0) return false; // security: size is often user-supplied + return InternalReadStringInline(buffer, size); +} +bool CodedInputStream::ReadStringFallback(string* buffer, int size) { if (!buffer->empty()) { buffer->clear(); } - if (size < buffer_size_) { - STLStringResizeUninitialized(buffer, size); - memcpy((uint8*)buffer->data(), buffer_, size); - Advance(size); - return true; - } - - while (buffer_size_ < size) { + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). - if (buffer_size_ != 0) { + if (current_buffer_size != 0) { // Note: string1.append(string2) is O(string2.size()) (as opposed to // O(string1.size() + string2.size()), which would be bad). - buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_); + buffer->append(reinterpret_cast<const char*>(buffer_), + current_buffer_size); } - size -= buffer_size_; - Advance(buffer_size_); + size -= current_buffer_size; + Advance(current_buffer_size); if (!Refresh()) return false; } @@ -292,11 +244,11 @@ bool CodedInputStream::ReadString(string* buffer, int size) { } -bool CodedInputStream::ReadLittleEndian32(uint32* value) { +bool CodedInputStream::ReadLittleEndian32Fallback(uint32* value) { uint8 bytes[sizeof(*value)]; const uint8* ptr; - if (buffer_size_ >= sizeof(*value)) { + if (BufferSize() >= sizeof(*value)) { // Fast path: Enough bytes in the buffer to read directly. ptr = buffer_; Advance(sizeof(*value)); @@ -305,19 +257,15 @@ bool CodedInputStream::ReadLittleEndian32(uint32* value) { if (!ReadRaw(bytes, sizeof(*value))) return false; ptr = bytes; } - - *value = (static_cast<uint32>(ptr[0]) ) | - (static_cast<uint32>(ptr[1]) << 8) | - (static_cast<uint32>(ptr[2]) << 16) | - (static_cast<uint32>(ptr[3]) << 24); + ReadLittleEndian32FromArray(ptr, value); return true; } -bool CodedInputStream::ReadLittleEndian64(uint64* value) { +bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) { uint8 bytes[sizeof(*value)]; const uint8* ptr; - if (buffer_size_ >= sizeof(*value)) { + if (BufferSize() >= sizeof(*value)) { // Fast path: Enough bytes in the buffer to read directly. ptr = buffer_; Advance(sizeof(*value)); @@ -326,99 +274,152 @@ bool CodedInputStream::ReadLittleEndian64(uint64* value) { if (!ReadRaw(bytes, sizeof(*value))) return false; ptr = bytes; } + ReadLittleEndian64FromArray(ptr, value); + return true; +} + +namespace { + +inline const uint8* ReadVarint32FromArray( + const uint8* buffer, uint32* value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; +inline const uint8* ReadVarint32FromArray(const uint8* buffer, uint32* value) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const uint8* ptr = buffer; + uint32 b; + uint32 result; + + b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { + b = *(ptr++); if (!(b & 0x80)) goto done; + } + + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return NULL; + + done: + *value = result; + return ptr; +} + +} // namespace - uint32 part0 = (static_cast<uint32>(ptr[0]) ) | - (static_cast<uint32>(ptr[1]) << 8) | - (static_cast<uint32>(ptr[2]) << 16) | - (static_cast<uint32>(ptr[3]) << 24); - uint32 part1 = (static_cast<uint32>(ptr[4]) ) | - (static_cast<uint32>(ptr[5]) << 8) | - (static_cast<uint32>(ptr[6]) << 16) | - (static_cast<uint32>(ptr[7]) << 24); - *value = static_cast<uint64>(part0) | - (static_cast<uint64>(part1) << 32); +bool CodedInputStream::ReadVarint32Slow(uint32* value) { + uint64 result; + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte varints. + if (!ReadVarint64Fallback(&result)) return false; + *value = (uint32)result; return true; } bool CodedInputStream::ReadVarint32Fallback(uint32* value) { - if (buffer_size_ >= kMaxVarintBytes || + if (BufferSize() >= kMaxVarintBytes || // Optimization: If the varint ends at exactly the end of the buffer, // we can detect that and still use the fast path. - (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - const uint8* ptr = buffer_; - uint32 b; - uint32 result; - - b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; - b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; - - // If the input is larger than 32 bits, we still need to read it all - // and discard the high-order bits. - for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { - b = *(ptr++); if (!(b & 0x80)) goto done; - } - - // We have overrun the maximum size of a varint (10 bytes). Assume - // the data is corrupt. - return false; - - done: - Advance(ptr - buffer_); - *value = result; + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + const uint8* end = ReadVarint32FromArray(buffer_, value); + if (end == NULL) return false; + buffer_ = end; return true; - } else { - // Optimization: If we're at a limit, detect that quickly. (This is - // common when reading tags.) - while (buffer_size_ == 0) { - // Detect cases where we definitely hit a byte limit without calling - // Refresh(). - if (// If we hit a limit, buffer_size_after_limit_ will be non-zero. - buffer_size_after_limit_ > 0 && - // Make sure that the limit we hit is not total_bytes_limit_, since - // in that case we still need to call Refresh() so that it prints an - // error. - total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { - // We hit a byte limit. - legitimate_message_end_ = true; - return false; - } + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarint32Slow(value); + } +} - // Call refresh. - if (!Refresh()) { - // Refresh failed. Make sure that it failed due to EOF, not because - // we hit total_bytes_limit_, which, unlike normal limits, is not a - // valid place to end a message. - int current_position = total_bytes_read_ - buffer_size_after_limit_; - if (current_position >= total_bytes_limit_) { - // Hit total_bytes_limit_. But if we also hit the normal limit, - // we're still OK. - legitimate_message_end_ = current_limit_ == total_bytes_limit_; - } else { - legitimate_message_end_ = true; - } - return false; +uint32 CodedInputStream::ReadTagSlow() { + if (buffer_ == buffer_end_) { + // Call refresh. + if (!Refresh()) { + // Refresh failed. Make sure that it failed due to EOF, not because + // we hit total_bytes_limit_, which, unlike normal limits, is not a + // valid place to end a message. + int current_position = total_bytes_read_ - buffer_size_after_limit_; + if (current_position >= total_bytes_limit_) { + // Hit total_bytes_limit_. But if we also hit the normal limit, + // we're still OK. + legitimate_message_end_ = current_limit_ == total_bytes_limit_; + } else { + legitimate_message_end_ = true; } + return 0; } + } - // Slow path: Just do a 64-bit read. - uint64 result; - if (!ReadVarint64(&result)) return false; - *value = (uint32)result; - return true; + // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags + // again, since we have now refreshed the buffer. + uint64 result; + if (!ReadVarint64(&result)) return 0; + return static_cast<uint32>(result); +} + +uint32 CodedInputStream::ReadTagFallback() { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + uint32 tag; + const uint8* end = ReadVarint32FromArray(buffer_, &tag); + if (end == NULL) { + return 0; + } + buffer_ = end; + return tag; + } else { + // We are commonly at a limit when attempting to read tags. Try to quickly + // detect this case without making another function call. + if (buffer_ == buffer_end_ && buffer_size_after_limit_ > 0 && + // Make sure that the limit we hit is not total_bytes_limit_, since + // in that case we still need to call Refresh() so that it prints an + // error. + total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { + // We hit a byte limit. + legitimate_message_end_ = true; + return 0; + } + return ReadTagSlow(); } } -bool CodedInputStream::ReadVarint64(uint64* value) { - if (buffer_size_ >= kMaxVarintBytes || +bool CodedInputStream::ReadVarint64Slow(uint64* value) { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + uint64 result = 0; + int count = 0; + uint32 b; + + do { + if (count == kMaxVarintBytes) return false; + while (buffer_ == buffer_end_) { + if (!Refresh()) return false; + } + b = *buffer_; + result |= static_cast<uint64>(b & 0x7F) << (7 * count); + Advance(1); + ++count; + } while (b & 0x80); + + *value = result; + return true; +} + +bool CodedInputStream::ReadVarint64Fallback(uint64* value) { + if (BufferSize() >= kMaxVarintBytes || // Optimization: If the varint ends at exactly the end of the buffer, // we can detect that and still use the fast path. - (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { // Fast path: We have enough bytes left in the buffer to guarantee that // this read won't cross the end, so we can skip the checks. @@ -442,7 +443,7 @@ bool CodedInputStream::ReadVarint64(uint64* value) { // We have overrun the maximum size of a varint (10 bytes). The data // must be corrupt. - return false; + return NULL; done: Advance(ptr - buffer_); @@ -450,33 +451,13 @@ bool CodedInputStream::ReadVarint64(uint64* value) { (static_cast<uint64>(part1) << 28) | (static_cast<uint64>(part2) << 56); return true; - } else { - // Slow path: This read might cross the end of the buffer, so we - // need to check and refresh the buffer if and when it does. - - uint64 result = 0; - int count = 0; - uint32 b; - - do { - if (count == kMaxVarintBytes) return false; - while (buffer_size_ == 0) { - if (!Refresh()) return false; - } - b = *buffer_; - result |= static_cast<uint64>(b & 0x7F) << (7 * count); - Advance(1); - ++count; - } while(b & 0x80); - - *value = result; - return true; + return ReadVarint64Slow(value); } } bool CodedInputStream::Refresh() { - GOOGLE_DCHECK_EQ(buffer_size_, 0); + GOOGLE_DCHECK_EQ(0, BufferSize()); if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 || total_bytes_read_ == current_limit_) { @@ -507,25 +488,27 @@ bool CodedInputStream::Refresh() { } const void* void_buffer; - if (input_->Next(&void_buffer, &buffer_size_)) { + int buffer_size; + if (input_->Next(&void_buffer, &buffer_size)) { buffer_ = reinterpret_cast<const uint8*>(void_buffer); - GOOGLE_CHECK_GE(buffer_size_, 0); + buffer_end_ = buffer_ + buffer_size; + GOOGLE_CHECK_GE(buffer_size, 0); - if (total_bytes_read_ <= INT_MAX - buffer_size_) { - total_bytes_read_ += buffer_size_; + if (total_bytes_read_ <= INT_MAX - buffer_size) { + total_bytes_read_ += buffer_size; } else { - // Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX. + // Overflow. Reset buffer_end_ to not include the bytes beyond INT_MAX. // We can't get that far anyway, because total_bytes_limit_ is guaranteed // to be less than it. We need to keep track of the number of bytes // we discarded, though, so that we can call input_->BackUp() to back // up over them on destruction. // The following line is equivalent to: - // overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX; + // overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX; // except that it avoids overflows. Signed integer overflow has // undefined results according to the C standard. - overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_); - buffer_size_ -= overflow_bytes_; + overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size); + buffer_end_ -= overflow_bytes_; total_bytes_read_ = INT_MAX; } @@ -533,7 +516,7 @@ bool CodedInputStream::Refresh() { return true; } else { buffer_ = NULL; - buffer_size_ = 0; + buffer_end_ = NULL; return false; } } |