1 files changed, 115 insertions, 252 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
index e3a34d0a..0851ff0c 100644
--- a/src/google/protobuf/io/coded_stream.cc
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -76,10 +76,6 @@ CodedInputStream::~CodedInputStream() {
   if (input_ != NULL) {
     BackUpInputToCurrentPosition();
   }
-
-  if (total_bytes_warning_threshold_ == -2) {
-    GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_;
-  }
 }
 
 // Static.
@@ -105,7 +101,7 @@ void CodedInputStream::BackUpInputToCurrentPosition() {
 
 inline void CodedInputStream::RecomputeBufferLimits() {
   buffer_end_ += buffer_size_after_limit_;
-  int closest_limit = min(current_limit_, total_bytes_limit_);
+  int closest_limit = std::min(current_limit_, total_bytes_limit_);
   if (closest_limit < total_bytes_read_) {
     // The limit position is in the current buffer.  We must adjust
     // the buffer size accordingly.
@@ -123,21 +119,15 @@ CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
   Limit old_limit = current_limit_;
 
   // security: byte_limit is possibly evil, so check for negative values
-  // and overflow.
-  if (byte_limit >= 0 &&
-      byte_limit <= INT_MAX - current_position) {
+  // and overflow. Also check that the new requested limit is before the
+  // previous limit; otherwise we continue to enforce the previous limit.
+  if (GOOGLE_PREDICT_TRUE(byte_limit >= 0 &&
+                        byte_limit <= INT_MAX - current_position &&
+                        byte_limit < current_limit_ - current_position)) {
     current_limit_ = current_position + byte_limit;
-  } else {
-    // Negative or overflow.
-    current_limit_ = INT_MAX;
+    RecomputeBufferLimits();
   }
 
-  // We need to enforce all limits, not just the new one, so if the previous
-  // limit was before the new requested limit, we continue to enforce the
-  // previous limit.
-  current_limit_ = min(current_limit_, old_limit);
-
-  RecomputeBufferLimits();
   return old_limit;
 }
 
@@ -183,18 +173,11 @@ int CodedInputStream::BytesUntilLimit() const {
   return current_limit_ - current_position;
 }
 
-void CodedInputStream::SetTotalBytesLimit(
-    int total_bytes_limit, int warning_threshold) {
+void CodedInputStream::SetTotalBytesLimit(int total_bytes_limit) {
   // Make sure the limit isn't already past, since this could confuse other
   // code.
   int current_position = CurrentPosition();
-  total_bytes_limit_ = max(current_position, total_bytes_limit);
-  if (warning_threshold >= 0) {
-    total_bytes_warning_threshold_ = warning_threshold;
-  } else {
-    // warning_threshold is negative
-    total_bytes_warning_threshold_ = -1;
-  }
+  total_bytes_limit_ = std::max(current_position, total_bytes_limit);
   RecomputeBufferLimits();
 }
 
@@ -211,17 +194,7 @@ void CodedInputStream::PrintTotalBytesLimitError() {
                 "in google/protobuf/io/coded_stream.h.";
 }
 
-bool CodedInputStream::Skip(int count) {
-  if (count < 0) return false;  // security: count is often user-supplied
-
-  const int original_buffer_size = BufferSize();
-
-  if (count <= original_buffer_size) {
-    // Just skipping within the current buffer.  Easy.
-    Advance(count);
-    return true;
-  }
-
+bool CodedInputStream::SkipFallback(int count, int original_buffer_size) {
   if (buffer_size_after_limit_ > 0) {
     // We hit a limit inside this buffer.  Advance to the limit and fail.
     Advance(original_buffer_size);
@@ -233,7 +206,7 @@ bool CodedInputStream::Skip(int count) {
   buffer_end_ = buffer_;
 
   // Make sure this skip doesn't try to skip past the current limit.
-  int closest_limit = min(current_limit_, total_bytes_limit_);
+  int closest_limit = std::min(current_limit_, total_bytes_limit_);
   int bytes_until_limit = closest_limit - total_bytes_read_;
   if (bytes_until_limit < count) {
     // We hit the limit.  Skip up to it then fail.
@@ -244,8 +217,12 @@ bool CodedInputStream::Skip(int count) {
     return false;
   }
 
+  if (!input_->Skip(count)) {
+    total_bytes_read_ = input_->ByteCount();
+    return false;
+  }
   total_bytes_read_ += count;
-  return input_->Skip(count);
+  return true;
 }
 
 bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) {
@@ -270,7 +247,7 @@ bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
     buffer->clear();
   }
 
-  int closest_limit = min(current_limit_, total_bytes_limit_);
+  int closest_limit = std::min(current_limit_, total_bytes_limit_);
   if (closest_limit != INT_MAX) {
     int bytes_to_limit = closest_limit - CurrentPosition();
     if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
@@ -339,7 +316,8 @@ namespace {
 // The first part of the pair is true iff the read was successful.  The second
 // part is buffer + (number of bytes read).  This function is always inlined,
 // so returning a pair is costless.
-GOOGLE_ATTRIBUTE_ALWAYS_INLINE ::std::pair<bool, const uint8*> ReadVarint32FromArray(
+GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
+::std::pair<bool, const uint8*> ReadVarint32FromArray(
     uint32 first_byte, const uint8* buffer,
     uint32* value);
 inline ::std::pair<bool, const uint8*> ReadVarint32FromArray(
@@ -376,6 +354,49 @@ inline ::std::pair<bool, const uint8*> ReadVarint32FromArray(
   return std::make_pair(true, ptr);
 }
 
+GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE::std::pair<bool, const uint8*>
+ReadVarint64FromArray(const uint8* buffer, uint64* value);
+inline ::std::pair<bool, const uint8*> ReadVarint64FromArray(
+    const uint8* buffer, uint64* value) {
+  const uint8* ptr = buffer;
+  uint32 b;
+
+  // Splitting into 32-bit pieces gives better performance on 32-bit
+  // processors.
+  uint32 part0 = 0, part1 = 0, part2 = 0;
+
+  b = *(ptr++); part0  = b      ; if (!(b & 0x80)) goto done;
+  part0 -= 0x80;
+  b = *(ptr++); part0 += b <<  7; if (!(b & 0x80)) goto done;
+  part0 -= 0x80 << 7;
+  b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
+  part0 -= 0x80 << 14;
+  b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
+  part0 -= 0x80 << 21;
+  b = *(ptr++); part1  = b      ; if (!(b & 0x80)) goto done;
+  part1 -= 0x80;
+  b = *(ptr++); part1 += b <<  7; if (!(b & 0x80)) goto done;
+  part1 -= 0x80 << 7;
+  b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
+  part1 -= 0x80 << 14;
+  b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
+  part1 -= 0x80 << 21;
+  b = *(ptr++); part2  = b      ; if (!(b & 0x80)) goto done;
+  part2 -= 0x80;
+  b = *(ptr++); part2 += b <<  7; if (!(b & 0x80)) goto done;
+  // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
+
+  // We have overrun the maximum size of a varint (10 bytes).  Assume
+  // the data is corrupt.
+  return std::make_pair(false, ptr);
+
+ done:
+  *value = (static_cast<uint64>(part0)) |
+           (static_cast<uint64>(part1) << 28) |
+           (static_cast<uint64>(part2) << 56);
+  return std::make_pair(true, ptr);
+}
+
 }  // namespace
 
 bool CodedInputStream::ReadVarint32Slow(uint32* value) {
@@ -408,6 +429,32 @@ int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_zero) {
   }
 }
 
+int CodedInputStream::ReadVarintSizeAsIntSlow() {
+  // Directly invoke ReadVarint64Fallback, since we already tried to optimize
+  // for one-byte varints.
+  std::pair<uint64, bool> p = ReadVarint64Fallback();
+  if (!p.second || p.first > static_cast<uint64>(INT_MAX)) return -1;
+  return p.first;
+}
+
+int CodedInputStream::ReadVarintSizeAsIntFallback() {
+  if (BufferSize() >= kMaxVarintBytes ||
+      // Optimization:  We're also safe if the buffer is non-empty and it ends
+      // with a byte that would terminate a varint.
+      (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+    uint64 temp;
+    ::std::pair<bool, const uint8*> p = ReadVarint64FromArray(buffer_, &temp);
+    if (!p.first || temp > static_cast<uint64>(INT_MAX)) return -1;
+    buffer_ = p.second;
+    return temp;
+  } else {
+    // Really slow case: we will incur the cost of an extra function call here,
+    // but moving this out of line reduces the size of this function, which
+    // improves the common case. In micro benchmarks, this is worth about 10-15%
+    return ReadVarintSizeAsIntSlow();
+  }
+}
+
 uint32 CodedInputStream::ReadTagSlow() {
   if (buffer_ == buffer_end_) {
     // Call refresh.
@@ -480,9 +527,15 @@ bool CodedInputStream::ReadVarint64Slow(uint64* value) {
   uint32 b;
 
   do {
-    if (count == kMaxVarintBytes) return false;
+    if (count == kMaxVarintBytes) {
+      *value = 0;
+      return false;
+    }
     while (buffer_ == buffer_end_) {
-      if (!Refresh()) return false;
+      if (!Refresh()) {
+        *value = 0;
+        return false;
+      }
     }
     b = *buffer_;
     result |= static_cast<uint64>(b & 0x7F) << (7 * count);
@@ -499,47 +552,13 @@ std::pair<uint64, bool> CodedInputStream::ReadVarint64Fallback() {
       // Optimization:  We're also safe if the buffer is non-empty and it ends
       // with a byte that would terminate a varint.
       (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
-    // Fast path:  We have enough bytes left in the buffer to guarantee that
-    // this read won't cross the end, so we can skip the checks.
-
-    const uint8* ptr = buffer_;
-    uint32 b;
-
-    // Splitting into 32-bit pieces gives better performance on 32-bit
-    // processors.
-    uint32 part0 = 0, part1 = 0, part2 = 0;
-
-    b = *(ptr++); part0  = b      ; if (!(b & 0x80)) goto done;
-    part0 -= 0x80;
-    b = *(ptr++); part0 += b <<  7; if (!(b & 0x80)) goto done;
-    part0 -= 0x80 << 7;
-    b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
-    part0 -= 0x80 << 14;
-    b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
-    part0 -= 0x80 << 21;
-    b = *(ptr++); part1  = b      ; if (!(b & 0x80)) goto done;
-    part1 -= 0x80;
-    b = *(ptr++); part1 += b <<  7; if (!(b & 0x80)) goto done;
-    part1 -= 0x80 << 7;
-    b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
-    part1 -= 0x80 << 14;
-    b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
-    part1 -= 0x80 << 21;
-    b = *(ptr++); part2  = b      ; if (!(b & 0x80)) goto done;
-    part2 -= 0x80;
-    b = *(ptr++); part2 += b <<  7; if (!(b & 0x80)) goto done;
-    // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
-
-    // We have overrun the maximum size of a varint (10 bytes).  The data
-    // must be corrupt.
-    return std::make_pair(0, false);
-
-   done:
-    Advance(ptr - buffer_);
-    return std::make_pair((static_cast<uint64>(part0)) |
-                              (static_cast<uint64>(part1) << 28) |
-                              (static_cast<uint64>(part2) << 56),
-                          true);
+    uint64 temp;
+    ::std::pair<bool, const uint8*> p = ReadVarint64FromArray(buffer_, &temp);
+    if (!p.first) {
+      return std::make_pair(0, false);
+    }
+    buffer_ = p.second;
+    return std::make_pair(temp, true);
   } else {
     uint64 temp;
     bool success = ReadVarint64Slow(&temp);
@@ -564,20 +583,6 @@ bool CodedInputStream::Refresh() {
     return false;
   }
 
-  if (total_bytes_warning_threshold_ >= 0 &&
-      total_bytes_read_ >= total_bytes_warning_threshold_) {
-      GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message.  If the "
-                      "message turns out to be larger than "
-                   << total_bytes_limit_ << " bytes, parsing will be halted "
-                      "for security reasons.  To increase the limit (or to "
-                      "disable these warnings), see "
-                      "CodedInputStream::SetTotalBytesLimit() in "
-                      "google/protobuf/io/coded_stream.h.";
-
-    // Don't warn again for this stream, and print total size at the end.
-    total_bytes_warning_threshold_ = -2;
-  }
-
   const void* void_buffer;
   int buffer_size;
   if (NextNonEmpty(input_, &void_buffer, &buffer_size)) {
@@ -614,20 +619,11 @@ bool CodedInputStream::Refresh() {
 
 // CodedOutputStream =================================================
 
+std::atomic<bool> CodedOutputStream::default_serialization_deterministic_{
+    false};
+
 CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
-  : output_(output),
-    buffer_(NULL),
-    buffer_size_(0),
-    total_bytes_(0),
-    had_error_(false),
-    aliasing_enabled_(false) {
-  // Eagerly Refresh() so buffer space is immediately available.
-  Refresh();
-  // The Refresh() may have failed. If the client doesn't write any data,
-  // though, don't consider this an error. If the client does write data, then
-  // another Refresh() will be attempted and it will set the error once again.
-  had_error_ = false;
-}
+    : CodedOutputStream(output, true) {}
 
 CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output,
                                      bool do_eager_refresh)
@@ -636,7 +632,8 @@ CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output,
     buffer_size_(0),
     total_bytes_(0),
     had_error_(false),
-    aliasing_enabled_(false) {
+    aliasing_enabled_(false),
+    is_serialization_deterministic_(IsDefaultSerializationDeterministic()) {
   if (do_eager_refresh) {
     // Eagerly Refresh() so buffer space is immediately available.
     Refresh();
@@ -749,104 +746,12 @@ void CodedOutputStream::WriteVarint32SlowPath(uint32 value) {
   WriteRaw(bytes, size);
 }
 
-inline uint8* CodedOutputStream::WriteVarint64ToArrayInline(
-    uint64 value, uint8* target) {
-  // Splitting into 32-bit pieces gives better performance on 32-bit
-  // processors.
-  uint32 part0 = static_cast<uint32>(value      );
-  uint32 part1 = static_cast<uint32>(value >> 28);
-  uint32 part2 = static_cast<uint32>(value >> 56);
-
-  int size;
-
-  // Here we can't really optimize for small numbers, since the value is
-  // split into three parts.  Cheking for numbers < 128, for instance,
-  // would require three comparisons, since you'd have to make sure part1
-  // and part2 are zero.  However, if the caller is using 64-bit integers,
-  // it is likely that they expect the numbers to often be very large, so
-  // we probably don't want to optimize for small numbers anyway.  Thus,
-  // we end up with a hardcoded binary search tree...
-  if (part2 == 0) {
-    if (part1 == 0) {
-      if (part0 < (1 << 14)) {
-        if (part0 < (1 << 7)) {
-          size = 1; goto size1;
-        } else {
-          size = 2; goto size2;
-        }
-      } else {
-        if (part0 < (1 << 21)) {
-          size = 3; goto size3;
-        } else {
-          size = 4; goto size4;
-        }
-      }
-    } else {
-      if (part1 < (1 << 14)) {
-        if (part1 < (1 << 7)) {
-          size = 5; goto size5;
-        } else {
-          size = 6; goto size6;
-        }
-      } else {
-        if (part1 < (1 << 21)) {
-          size = 7; goto size7;
-        } else {
-          size = 8; goto size8;
-        }
-      }
-    }
-  } else {
-    if (part2 < (1 << 7)) {
-      size = 9; goto size9;
-    } else {
-      size = 10; goto size10;
-    }
-  }
-
-  GOOGLE_LOG(FATAL) << "Can't get here.";
-
-  size10: target[9] = static_cast<uint8>((part2 >>  7) | 0x80);
-  size9 : target[8] = static_cast<uint8>((part2      ) | 0x80);
-  size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80);
-  size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80);
-  size6 : target[5] = static_cast<uint8>((part1 >>  7) | 0x80);
-  size5 : target[4] = static_cast<uint8>((part1      ) | 0x80);
-  size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80);
-  size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80);
-  size2 : target[1] = static_cast<uint8>((part0 >>  7) | 0x80);
-  size1 : target[0] = static_cast<uint8>((part0      ) | 0x80);
-
-  target[size-1] &= 0x7F;
-  return target + size;
-}
-
-void CodedOutputStream::WriteVarint64(uint64 value) {
-  if (buffer_size_ >= kMaxVarintBytes) {
-    // Fast path:  We have enough bytes left in the buffer to guarantee that
-    // this write won't cross the end, so we can skip the checks.
-    uint8* target = buffer_;
-
-    uint8* end = WriteVarint64ToArrayInline(value, target);
-    int size = end - target;
-    Advance(size);
-  } else {
-    // Slow path:  This write might cross the end of the buffer, so we
-    // compose the bytes first then use WriteRaw().
-    uint8 bytes[kMaxVarintBytes];
-    int size = 0;
-    while (value > 0x7F) {
-      bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
-      value >>= 7;
-    }
-    bytes[size++] = static_cast<uint8>(value) & 0x7F;
-    WriteRaw(bytes, size);
-  }
-}
-
-uint8* CodedOutputStream::WriteVarint64ToArray(
-    uint64 value, uint8* target) {
-  return WriteVarint64ToArrayInline(value, target);
+void CodedOutputStream::WriteVarint64SlowPath(uint64 value) {
+  uint8 bytes[kMaxVarintBytes];
+  uint8* target = &bytes[0];
+  uint8* end = WriteVarint64ToArray(value, target);
+  int size = end - target;
+  WriteRaw(bytes, size);
 }
 
 bool CodedOutputStream::Refresh() {
@@ -863,48 +768,6 @@ bool CodedOutputStream::Refresh() {
   }
 }
 
-int CodedOutputStream::VarintSize32Fallback(uint32 value) {
-  if (value < (1 << 7)) {
-    return 1;
-  } else if (value < (1 << 14)) {
-    return 2;
-  } else if (value < (1 << 21)) {
-    return 3;
-  } else if (value < (1 << 28)) {
-    return 4;
-  } else {
-    return 5;
-  }
-}
-
-int CodedOutputStream::VarintSize64(uint64 value) {
-  if (value < (1ull << 35)) {
-    if (value < (1ull << 7)) {
-      return 1;
-    } else if (value < (1ull << 14)) {
-      return 2;
-    } else if (value < (1ull << 21)) {
-      return 3;
-    } else if (value < (1ull << 28)) {
-      return 4;
-    } else {
-      return 5;
-    }
-  } else {
-    if (value < (1ull << 42)) {
-      return 6;
-    } else if (value < (1ull << 49)) {
-      return 7;
-    } else if (value < (1ull << 56)) {
-      return 8;
-    } else if (value < (1ull << 63)) {
-      return 9;
-    } else {
-      return 10;
-    }
-  }
-}
-
 uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
                                                      uint8* target) {
   GOOGLE_DCHECK_LE(str.size(), kuint32max);