diff options
Diffstat (limited to 'src/google/protobuf/io/coded_stream.cc')
-rw-r--r-- | src/google/protobuf/io/coded_stream.cc | 367 |
1 files changed, 115 insertions, 252 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc index e3a34d0a..0851ff0c 100644 --- a/src/google/protobuf/io/coded_stream.cc +++ b/src/google/protobuf/io/coded_stream.cc @@ -76,10 +76,6 @@ CodedInputStream::~CodedInputStream() { if (input_ != NULL) { BackUpInputToCurrentPosition(); } - - if (total_bytes_warning_threshold_ == -2) { - GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_; - } } // Static. @@ -105,7 +101,7 @@ void CodedInputStream::BackUpInputToCurrentPosition() { inline void CodedInputStream::RecomputeBufferLimits() { buffer_end_ += buffer_size_after_limit_; - int closest_limit = min(current_limit_, total_bytes_limit_); + int closest_limit = std::min(current_limit_, total_bytes_limit_); if (closest_limit < total_bytes_read_) { // The limit position is in the current buffer. We must adjust // the buffer size accordingly. @@ -123,21 +119,15 @@ CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { Limit old_limit = current_limit_; // security: byte_limit is possibly evil, so check for negative values - // and overflow. - if (byte_limit >= 0 && - byte_limit <= INT_MAX - current_position) { + // and overflow. Also check that the new requested limit is before the + // previous limit; otherwise we continue to enforce the previous limit. + if (GOOGLE_PREDICT_TRUE(byte_limit >= 0 && + byte_limit <= INT_MAX - current_position && + byte_limit < current_limit_ - current_position)) { current_limit_ = current_position + byte_limit; - } else { - // Negative or overflow. - current_limit_ = INT_MAX; + RecomputeBufferLimits(); } - // We need to enforce all limits, not just the new one, so if the previous - // limit was before the new requested limit, we continue to enforce the - // previous limit. - current_limit_ = min(current_limit_, old_limit); - - RecomputeBufferLimits(); return old_limit; } @@ -183,18 +173,11 @@ int CodedInputStream::BytesUntilLimit() const { return current_limit_ - current_position; } -void CodedInputStream::SetTotalBytesLimit( - int total_bytes_limit, int warning_threshold) { +void CodedInputStream::SetTotalBytesLimit(int total_bytes_limit) { // Make sure the limit isn't already past, since this could confuse other // code. int current_position = CurrentPosition(); - total_bytes_limit_ = max(current_position, total_bytes_limit); - if (warning_threshold >= 0) { - total_bytes_warning_threshold_ = warning_threshold; - } else { - // warning_threshold is negative - total_bytes_warning_threshold_ = -1; - } + total_bytes_limit_ = std::max(current_position, total_bytes_limit); RecomputeBufferLimits(); } @@ -211,17 +194,7 @@ void CodedInputStream::PrintTotalBytesLimitError() { "in google/protobuf/io/coded_stream.h."; } -bool CodedInputStream::Skip(int count) { - if (count < 0) return false; // security: count is often user-supplied - - const int original_buffer_size = BufferSize(); - - if (count <= original_buffer_size) { - // Just skipping within the current buffer. Easy. - Advance(count); - return true; - } - +bool CodedInputStream::SkipFallback(int count, int original_buffer_size) { if (buffer_size_after_limit_ > 0) { // We hit a limit inside this buffer. Advance to the limit and fail. Advance(original_buffer_size); @@ -233,7 +206,7 @@ bool CodedInputStream::Skip(int count) { buffer_end_ = buffer_; // Make sure this skip doesn't try to skip past the current limit. - int closest_limit = min(current_limit_, total_bytes_limit_); + int closest_limit = std::min(current_limit_, total_bytes_limit_); int bytes_until_limit = closest_limit - total_bytes_read_; if (bytes_until_limit < count) { // We hit the limit. Skip up to it then fail. @@ -244,8 +217,12 @@ bool CodedInputStream::Skip(int count) { return false; } + if (!input_->Skip(count)) { + total_bytes_read_ = input_->ByteCount(); + return false; + } total_bytes_read_ += count; - return input_->Skip(count); + return true; } bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) { @@ -270,7 +247,7 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) { buffer->clear(); } - int closest_limit = min(current_limit_, total_bytes_limit_); + int closest_limit = std::min(current_limit_, total_bytes_limit_); if (closest_limit != INT_MAX) { int bytes_to_limit = closest_limit - CurrentPosition(); if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) { @@ -339,7 +316,8 @@ namespace { // The first part of the pair is true iff the read was successful. The second // part is buffer + (number of bytes read). This function is always inlined, // so returning a pair is costless. -GOOGLE_ATTRIBUTE_ALWAYS_INLINE ::std::pair<bool, const uint8*> ReadVarint32FromArray( +GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE +::std::pair<bool, const uint8*> ReadVarint32FromArray( uint32 first_byte, const uint8* buffer, uint32* value); inline ::std::pair<bool, const uint8*> ReadVarint32FromArray( @@ -376,6 +354,49 @@ inline ::std::pair<bool, const uint8*> ReadVarint32FromArray( return std::make_pair(true, ptr); } +GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE::std::pair<bool, const uint8*> +ReadVarint64FromArray(const uint8* buffer, uint64* value); +inline ::std::pair<bool, const uint8*> ReadVarint64FromArray( + const uint8* buffer, uint64* value) { + const uint8* ptr = buffer; + uint32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + uint32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done; + part0 -= 0x80; + b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 7; + b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 14; + b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done; + part0 -= 0x80 << 21; + b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done; + part1 -= 0x80; + b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 7; + b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 14; + b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done; + part1 -= 0x80 << 21; + b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done; + part2 -= 0x80; + b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done; + // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0. + + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return std::make_pair(false, ptr); + + done: + *value = (static_cast<uint64>(part0)) | + (static_cast<uint64>(part1) << 28) | + (static_cast<uint64>(part2) << 56); + return std::make_pair(true, ptr); +} + } // namespace bool CodedInputStream::ReadVarint32Slow(uint32* value) { @@ -408,6 +429,32 @@ int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_zero) { } } +int CodedInputStream::ReadVarintSizeAsIntSlow() { + // Directly invoke ReadVarint64Fallback, since we already tried to optimize + // for one-byte varints. + std::pair<uint64, bool> p = ReadVarint64Fallback(); + if (!p.second || p.first > static_cast<uint64>(INT_MAX)) return -1; + return p.first; +} + +int CodedInputStream::ReadVarintSizeAsIntFallback() { + if (BufferSize() >= kMaxVarintBytes || + // Optimization: We're also safe if the buffer is non-empty and it ends + // with a byte that would terminate a varint. + (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { + uint64 temp; + ::std::pair<bool, const uint8*> p = ReadVarint64FromArray(buffer_, &temp); + if (!p.first || temp > static_cast<uint64>(INT_MAX)) return -1; + buffer_ = p.second; + return temp; + } else { + // Really slow case: we will incur the cost of an extra function call here, + // but moving this out of line reduces the size of this function, which + // improves the common case. In micro benchmarks, this is worth about 10-15% + return ReadVarintSizeAsIntSlow(); + } +} + uint32 CodedInputStream::ReadTagSlow() { if (buffer_ == buffer_end_) { // Call refresh. @@ -480,9 +527,15 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) { uint32 b; do { - if (count == kMaxVarintBytes) return false; + if (count == kMaxVarintBytes) { + *value = 0; + return false; + } while (buffer_ == buffer_end_) { - if (!Refresh()) return false; + if (!Refresh()) { + *value = 0; + return false; + } } b = *buffer_; result |= static_cast<uint64>(b & 0x7F) << (7 * count); @@ -499,47 +552,13 @@ std::pair<uint64, bool> CodedInputStream::ReadVarint64Fallback() { // Optimization: We're also safe if the buffer is non-empty and it ends // with a byte that would terminate a varint. (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this read won't cross the end, so we can skip the checks. - - const uint8* ptr = buffer_; - uint32 b; - - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - uint32 part0 = 0, part1 = 0, part2 = 0; - - b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done; - part0 -= 0x80; - b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 7; - b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 14; - b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 21; - b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done; - part1 -= 0x80; - b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 7; - b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 14; - b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 21; - b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done; - part2 -= 0x80; - b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done; - // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0. - - // We have overrun the maximum size of a varint (10 bytes). The data - // must be corrupt. - return std::make_pair(0, false); - - done: - Advance(ptr - buffer_); - return std::make_pair((static_cast<uint64>(part0)) | - (static_cast<uint64>(part1) << 28) | - (static_cast<uint64>(part2) << 56), - true); + uint64 temp; + ::std::pair<bool, const uint8*> p = ReadVarint64FromArray(buffer_, &temp); + if (!p.first) { + return std::make_pair(0, false); + } + buffer_ = p.second; + return std::make_pair(temp, true); } else { uint64 temp; bool success = ReadVarint64Slow(&temp); @@ -564,20 +583,6 @@ bool CodedInputStream::Refresh() { return false; } - if (total_bytes_warning_threshold_ >= 0 && - total_bytes_read_ >= total_bytes_warning_threshold_) { - GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message. If the " - "message turns out to be larger than " - << total_bytes_limit_ << " bytes, parsing will be halted " - "for security reasons. To increase the limit (or to " - "disable these warnings), see " - "CodedInputStream::SetTotalBytesLimit() in " - "google/protobuf/io/coded_stream.h."; - - // Don't warn again for this stream, and print total size at the end. - total_bytes_warning_threshold_ = -2; - } - const void* void_buffer; int buffer_size; if (NextNonEmpty(input_, &void_buffer, &buffer_size)) { @@ -614,20 +619,11 @@ bool CodedInputStream::Refresh() { // CodedOutputStream ================================================= +std::atomic<bool> CodedOutputStream::default_serialization_deterministic_{ + false}; + CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output) - : output_(output), - buffer_(NULL), - buffer_size_(0), - total_bytes_(0), - had_error_(false), - aliasing_enabled_(false) { - // Eagerly Refresh() so buffer space is immediately available. - Refresh(); - // The Refresh() may have failed. If the client doesn't write any data, - // though, don't consider this an error. If the client does write data, then - // another Refresh() will be attempted and it will set the error once again. - had_error_ = false; -} + : CodedOutputStream(output, true) {} CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh) @@ -636,7 +632,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output, buffer_size_(0), total_bytes_(0), had_error_(false), - aliasing_enabled_(false) { + aliasing_enabled_(false), + is_serialization_deterministic_(IsDefaultSerializationDeterministic()) { if (do_eager_refresh) { // Eagerly Refresh() so buffer space is immediately available. Refresh(); @@ -749,104 +746,12 @@ void CodedOutputStream::WriteVarint32SlowPath(uint32 value) { WriteRaw(bytes, size); } -inline uint8* CodedOutputStream::WriteVarint64ToArrayInline( - uint64 value, uint8* target) { - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - uint32 part0 = static_cast<uint32>(value ); - uint32 part1 = static_cast<uint32>(value >> 28); - uint32 part2 = static_cast<uint32>(value >> 56); - - int size; - - // Here we can't really optimize for small numbers, since the value is - // split into three parts. Cheking for numbers < 128, for instance, - // would require three comparisons, since you'd have to make sure part1 - // and part2 are zero. However, if the caller is using 64-bit integers, - // it is likely that they expect the numbers to often be very large, so - // we probably don't want to optimize for small numbers anyway. Thus, - // we end up with a hardcoded binary search tree... - if (part2 == 0) { - if (part1 == 0) { - if (part0 < (1 << 14)) { - if (part0 < (1 << 7)) { - size = 1; goto size1; - } else { - size = 2; goto size2; - } - } else { - if (part0 < (1 << 21)) { - size = 3; goto size3; - } else { - size = 4; goto size4; - } - } - } else { - if (part1 < (1 << 14)) { - if (part1 < (1 << 7)) { - size = 5; goto size5; - } else { - size = 6; goto size6; - } - } else { - if (part1 < (1 << 21)) { - size = 7; goto size7; - } else { - size = 8; goto size8; - } - } - } - } else { - if (part2 < (1 << 7)) { - size = 9; goto size9; - } else { - size = 10; goto size10; - } - } - - GOOGLE_LOG(FATAL) << "Can't get here."; - - size10: target[9] = static_cast<uint8>((part2 >> 7) | 0x80); - size9 : target[8] = static_cast<uint8>((part2 ) | 0x80); - size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80); - size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80); - size6 : target[5] = static_cast<uint8>((part1 >> 7) | 0x80); - size5 : target[4] = static_cast<uint8>((part1 ) | 0x80); - size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80); - size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80); - size2 : target[1] = static_cast<uint8>((part0 >> 7) | 0x80); - size1 : target[0] = static_cast<uint8>((part0 ) | 0x80); - - target[size-1] &= 0x7F; - return target + size; -} - -void CodedOutputStream::WriteVarint64(uint64 value) { - if (buffer_size_ >= kMaxVarintBytes) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this write won't cross the end, so we can skip the checks. - uint8* target = buffer_; - - uint8* end = WriteVarint64ToArrayInline(value, target); - int size = end - target; - Advance(size); - } else { - // Slow path: This write might cross the end of the buffer, so we - // compose the bytes first then use WriteRaw(). - uint8 bytes[kMaxVarintBytes]; - int size = 0; - while (value > 0x7F) { - bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80; - value >>= 7; - } - bytes[size++] = static_cast<uint8>(value) & 0x7F; - WriteRaw(bytes, size); - } -} - -uint8* CodedOutputStream::WriteVarint64ToArray( - uint64 value, uint8* target) { - return WriteVarint64ToArrayInline(value, target); +void CodedOutputStream::WriteVarint64SlowPath(uint64 value) { + uint8 bytes[kMaxVarintBytes]; + uint8* target = &bytes[0]; + uint8* end = WriteVarint64ToArray(value, target); + int size = end - target; + WriteRaw(bytes, size); } bool CodedOutputStream::Refresh() { @@ -863,48 +768,6 @@ bool CodedOutputStream::Refresh() { } } -int CodedOutputStream::VarintSize32Fallback(uint32 value) { - if (value < (1 << 7)) { - return 1; - } else if (value < (1 << 14)) { - return 2; - } else if (value < (1 << 21)) { - return 3; - } else if (value < (1 << 28)) { - return 4; - } else { - return 5; - } -} - -int CodedOutputStream::VarintSize64(uint64 value) { - if (value < (1ull << 35)) { - if (value < (1ull << 7)) { - return 1; - } else if (value < (1ull << 14)) { - return 2; - } else if (value < (1ull << 21)) { - return 3; - } else if (value < (1ull << 28)) { - return 4; - } else { - return 5; - } - } else { - if (value < (1ull << 42)) { - return 6; - } else if (value < (1ull << 49)) { - return 7; - } else if (value < (1ull << 56)) { - return 8; - } else if (value < (1ull << 63)) { - return 9; - } else { - return 10; - } - } -} - uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str, uint8* target) { GOOGLE_DCHECK_LE(str.size(), kuint32max); |