aboutsummaryrefslogtreecommitdiff
path: root/src/google/protobuf/io
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r--src/google/protobuf/io/coded_stream.cc757
-rw-r--r--src/google/protobuf/io/coded_stream.h592
-rw-r--r--src/google/protobuf/io/coded_stream_unittest.cc929
-rw-r--r--src/google/protobuf/io/package_info.h40
-rw-r--r--src/google/protobuf/io/printer.cc165
-rw-r--r--src/google/protobuf/io/printer.h109
-rw-r--r--src/google/protobuf/io/printer_unittest.cc210
-rw-r--r--src/google/protobuf/io/tokenizer.cc679
-rw-r--r--src/google/protobuf/io/tokenizer.h276
-rw-r--r--src/google/protobuf/io/tokenizer_unittest.cc706
-rw-r--r--src/google/protobuf/io/zero_copy_stream.cc34
-rw-r--r--src/google/protobuf/io/zero_copy_stream.h224
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.cc793
-rw-r--r--src/google/protobuf/io/zero_copy_stream_impl.h617
-rw-r--r--src/google/protobuf/io/zero_copy_stream_unittest.cc443
15 files changed, 6574 insertions, 0 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
new file mode 100644
index 00000000..58c44dc1
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -0,0 +1,757 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This implementation is heavily optimized to make reads and writes
+// of small values (especially varints) as fast as possible. In
+// particular, we optimize for the common case that a read or a write
+// will not cross the end of the buffer, since we can avoid a lot
+// of branching in this case.
+
+#include <stack>
+#include <limits.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util-inl.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+namespace {
+
+static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
+
+static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
+static const int kDefaultRecursionLimit = 64;
+
+static const int kMaxVarintBytes = 10;
+static const int kMaxVarint32Bytes = 5;
+
+
+} // namespace
+
+// CodedInputStream ==================================================
+
+CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
+ : input_(input),
+ buffer_(NULL),
+ buffer_size_(0),
+ total_bytes_read_(0),
+ overflow_bytes_(0),
+
+ last_tag_(0),
+ legitimate_message_end_(false),
+
+ aliasing_enabled_(false),
+
+ current_limit_(INT_MAX),
+ buffer_size_after_limit_(0),
+
+ total_bytes_limit_(kDefaultTotalBytesLimit),
+ total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+
+ recursion_depth_(0),
+ recursion_limit_(kDefaultRecursionLimit) {
+}
+
+CodedInputStream::~CodedInputStream() {
+ int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_;
+ if (backup_bytes > 0) {
+ // We still have bytes left over from the last buffer. Back up over
+ // them.
+ input_->BackUp(backup_bytes);
+ }
+}
+
+
+inline void CodedInputStream::RecomputeBufferLimits() {
+ buffer_size_ += buffer_size_after_limit_;
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ if (closest_limit < total_bytes_read_) {
+ // The limit position is in the current buffer. We must adjust
+ // the buffer size accordingly.
+ buffer_size_after_limit_ = total_bytes_read_ - closest_limit;
+ buffer_size_ -= buffer_size_after_limit_;
+ } else {
+ buffer_size_after_limit_ = 0;
+ }
+}
+
+CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
+ // Current position relative to the beginning of the stream.
+ int current_position = total_bytes_read_ -
+ (buffer_size_ + buffer_size_after_limit_);
+
+ Limit old_limit = current_limit_;
+
+ // security: byte_limit is possibly evil, so check for negative values
+ // and overflow.
+ if (byte_limit >= 0 &&
+ byte_limit <= INT_MAX - current_position) {
+ current_limit_ = current_position + byte_limit;
+ } else {
+ // Negative or overflow.
+ current_limit_ = INT_MAX;
+ }
+
+ // We need to enforce all limits, not just the new one, so if the previous
+ // limit was before the new requested limit, we continue to enforce the
+ // previous limit.
+ current_limit_ = min(current_limit_, old_limit);
+
+ RecomputeBufferLimits();
+ return old_limit;
+}
+
+void CodedInputStream::PopLimit(Limit limit) {
+ // The limit passed in is actually the *old* limit, which we returned from
+ // PushLimit().
+ current_limit_ = limit;
+ RecomputeBufferLimits();
+
+ // We may no longer be at a legitimate message end. ReadTag() needs to be
+ // called again to find out.
+ legitimate_message_end_ = false;
+}
+
+int CodedInputStream::BytesUntilLimit() {
+ if (current_limit_ == INT_MAX) return -1;
+ int current_position = total_bytes_read_ -
+ (buffer_size_ + buffer_size_after_limit_);
+
+ return current_limit_ - current_position;
+}
+
+void CodedInputStream::SetTotalBytesLimit(
+ int total_bytes_limit, int warning_threshold) {
+ // Make sure the limit isn't already past, since this could confuse other
+ // code.
+ int current_position = total_bytes_read_ -
+ (buffer_size_ + buffer_size_after_limit_);
+ total_bytes_limit_ = max(current_position, total_bytes_limit);
+ total_bytes_warning_threshold_ = warning_threshold;
+ RecomputeBufferLimits();
+}
+
+void CodedInputStream::PrintTotalBytesLimitError() {
+ GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
+ "big (more than " << total_bytes_limit_
+ << " bytes). To increase the limit (or to disable these "
+ "warnings), see CodedInputStream::SetTotalBytesLimit() "
+ "in google/protobuf/io/coded_stream.h.";
+}
+
+bool CodedInputStream::Skip(int count) {
+ if (count < 0) return false; // security: count is often user-supplied
+
+ if (count <= buffer_size_) {
+ // Just skipping within the current buffer. Easy.
+ Advance(count);
+ return true;
+ }
+
+ if (buffer_size_after_limit_ > 0) {
+ // We hit a limit inside this buffer. Advance to the limit and fail.
+ Advance(buffer_size_);
+ return false;
+ }
+
+ count -= buffer_size_;
+ buffer_ = NULL;
+ buffer_size_ = 0;
+
+ // Make sure this skip doesn't try to skip past the current limit.
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ int bytes_until_limit = closest_limit - total_bytes_read_;
+ if (bytes_until_limit < count) {
+ // We hit the limit. Skip up to it then fail.
+ total_bytes_read_ = closest_limit;
+ input_->Skip(bytes_until_limit);
+ return false;
+ }
+
+ total_bytes_read_ += count;
+ return input_->Skip(count);
+}
+
+bool CodedInputStream::ReadRaw(void* buffer, int size) {
+ while (buffer_size_ < size) {
+ // Reading past end of buffer. Copy what we have, then refresh.
+ memcpy(buffer, buffer_, buffer_size_);
+ buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_;
+ size -= buffer_size_;
+ if (!Refresh()) return false;
+ }
+
+ memcpy(buffer, buffer_, size);
+ Advance(size);
+
+ return true;
+}
+
+bool CodedInputStream::ReadString(string* buffer, int size) {
+ if (size < 0) return false; // security: size is often user-supplied
+
+ if (!buffer->empty()) {
+ buffer->clear();
+ }
+
+ if (size < buffer_size_) {
+ STLStringResizeUninitialized(buffer, size);
+ memcpy((uint8*)buffer->data(), buffer_, size);
+ Advance(size);
+ return true;
+ }
+
+ while (buffer_size_ < size) {
+ // Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
+ if (buffer_size_ != 0) {
+ // Note: string1.append(string2) is O(string2.size()) (as opposed to
+ // O(string1.size() + string2.size()), which would be bad).
+ buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_);
+ }
+ size -= buffer_size_;
+ if (!Refresh()) return false;
+ }
+
+ buffer->append(reinterpret_cast<const char*>(buffer_), size);
+ Advance(size);
+
+ return true;
+}
+
+
+bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+ uint8 bytes[sizeof(*value)];
+
+ const uint8* ptr;
+ if (buffer_size_ >= sizeof(*value)) {
+ // Fast path: Enough bytes in the buffer to read directly.
+ ptr = buffer_;
+ Advance(sizeof(*value));
+ } else {
+ // Slow path: Had to read past the end of the buffer.
+ if (!ReadRaw(bytes, sizeof(*value))) return false;
+ ptr = bytes;
+ }
+
+ *value = (static_cast<uint32>(ptr[0]) ) |
+ (static_cast<uint32>(ptr[1]) << 8) |
+ (static_cast<uint32>(ptr[2]) << 16) |
+ (static_cast<uint32>(ptr[3]) << 24);
+ return true;
+}
+
+bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+ uint8 bytes[sizeof(*value)];
+
+ const uint8* ptr;
+ if (buffer_size_ >= sizeof(*value)) {
+ // Fast path: Enough bytes in the buffer to read directly.
+ ptr = buffer_;
+ Advance(sizeof(*value));
+ } else {
+ // Slow path: Had to read past the end of the buffer.
+ if (!ReadRaw(bytes, sizeof(*value))) return false;
+ ptr = bytes;
+ }
+
+ uint32 part0 = (static_cast<uint32>(ptr[0]) ) |
+ (static_cast<uint32>(ptr[1]) << 8) |
+ (static_cast<uint32>(ptr[2]) << 16) |
+ (static_cast<uint32>(ptr[3]) << 24);
+ uint32 part1 = (static_cast<uint32>(ptr[4]) ) |
+ (static_cast<uint32>(ptr[5]) << 8) |
+ (static_cast<uint32>(ptr[6]) << 16) |
+ (static_cast<uint32>(ptr[7]) << 24);
+ *value = static_cast<uint64>(part0) |
+ (static_cast<uint64>(part1) << 32);
+ return true;
+}
+
+bool CodedInputStream::ReadVarint32Fallback(uint32* value) {
+ if (buffer_size_ >= kMaxVarintBytes ||
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+ const uint8* ptr = buffer_;
+ uint32 b;
+ uint32 result;
+
+ b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done;
+
+ // If the input is larger than 32 bits, we still need to read it all
+ // and discard the high-order bits.
+ for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
+ b = *(ptr++); if (!(b & 0x80)) goto done;
+ }
+
+ // We have overrun the maximum size of a varint (10 bytes). Assume
+ // the data is corrupt.
+ return false;
+
+ done:
+ Advance(ptr - buffer_);
+ *value = result;
+ return true;
+
+ } else {
+ // Optimization: If we're at a limit, detect that quickly. (This is
+ // common when reading tags.)
+ while (buffer_size_ == 0) {
+ // Detect cases where we definitely hit a byte limit without calling
+ // Refresh().
+ if (// If we hit a limit, buffer_size_after_limit_ will be non-zero.
+ buffer_size_after_limit_ > 0 &&
+ // Make sure that the limit we hit is not total_bytes_limit_, since
+ // in that case we still need to call Refresh() so that it prints an
+ // error.
+ total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
+ // We hit a byte limit.
+ legitimate_message_end_ = true;
+ return false;
+ }
+
+ // Call refresh.
+ if (!Refresh()) {
+ // Refresh failed. Make sure that it failed due to EOF, not because
+ // we hit total_bytes_limit_, which, unlike normal limits, is not a
+ // valid place to end a message.
+ int current_position = total_bytes_read_ - buffer_size_after_limit_;
+ if (current_position >= total_bytes_limit_) {
+ // Hit total_bytes_limit_. But if we also hit the normal limit,
+ // we're still OK.
+ legitimate_message_end_ = current_limit_ == total_bytes_limit_;
+ } else {
+ legitimate_message_end_ = true;
+ }
+ return false;
+ }
+ }
+
+ // Slow path: Just do a 64-bit read.
+ uint64 result;
+ if (!ReadVarint64(&result)) return false;
+ *value = (uint32)result;
+ return true;
+ }
+}
+
+bool CodedInputStream::ReadVarint64(uint64* value) {
+ if (buffer_size_ >= kMaxVarintBytes ||
+ // Optimization: If the varint ends at exactly the end of the buffer,
+ // we can detect that and still use the fast path.
+ (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+
+ const uint8* ptr = buffer_;
+ uint32 b;
+
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ uint32 part0 = 0, part1 = 0, part2 = 0;
+
+ b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done;
+ b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done;
+
+ // We have overrun the maximum size of a varint (10 bytes). The data
+ // must be corrupt.
+ return false;
+
+ done:
+ Advance(ptr - buffer_);
+ *value = (static_cast<uint64>(part0) ) |
+ (static_cast<uint64>(part1) << 28) |
+ (static_cast<uint64>(part2) << 56);
+ return true;
+
+ } else {
+ // Slow path: This read might cross the end of the buffer, so we
+ // need to check and refresh the buffer if and when it does.
+
+ uint64 result = 0;
+ int count = 0;
+ uint32 b;
+
+ do {
+ if (count == kMaxVarintBytes) return false;
+ while (buffer_size_ == 0) {
+ if (!Refresh()) return false;
+ }
+ b = *buffer_;
+ result |= static_cast<uint64>(b & 0x7F) << (7 * count);
+ Advance(1);
+ ++count;
+ } while(b & 0x80);
+
+ *value = result;
+ return true;
+ }
+}
+
+bool CodedInputStream::Refresh() {
+ if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0) {
+ // We've hit a limit. Stop.
+ buffer_ += buffer_size_;
+ buffer_size_ = 0;
+
+ int current_position = total_bytes_read_ - buffer_size_after_limit_;
+
+ if (current_position >= total_bytes_limit_ &&
+ total_bytes_limit_ != current_limit_) {
+ // Hit total_bytes_limit_.
+ PrintTotalBytesLimitError();
+ }
+
+ return false;
+ }
+
+ if (total_bytes_warning_threshold_ >= 0 &&
+ total_bytes_read_ >= total_bytes_warning_threshold_) {
+ GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message. If the "
+ "message turns out to be larger than "
+ << total_bytes_limit_ << " bytes, parsing will be halted "
+ "for security reasons. To increase the limit (or to "
+ "disable these warnings), see "
+ "CodedInputStream::SetTotalBytesLimit() in "
+ "google/protobuf/io/coded_stream.h.";
+
+ // Don't warn again for this stream.
+ total_bytes_warning_threshold_ = -1;
+ }
+
+ const void* void_buffer;
+ if (input_->Next(&void_buffer, &buffer_size_)) {
+ buffer_ = reinterpret_cast<const uint8*>(void_buffer);
+ GOOGLE_CHECK_GE(buffer_size_, 0);
+
+ if (total_bytes_read_ <= INT_MAX - buffer_size_) {
+ total_bytes_read_ += buffer_size_;
+ } else {
+ // Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX.
+ // We can't get that far anyway, because total_bytes_limit_ is guaranteed
+ // to be less than it. We need to keep track of the number of bytes
+ // we discarded, though, so that we can call input_->BackUp() to back
+ // up over them on destruction.
+
+ // The following line is equivalent to:
+ // overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX;
+ // except that it avoids overflows. Signed integer overflow has
+ // undefined results according to the C standard.
+ overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_);
+ buffer_size_ -= overflow_bytes_;
+ total_bytes_read_ = INT_MAX;
+ }
+
+ RecomputeBufferLimits();
+ return true;
+ } else {
+ buffer_ = NULL;
+ buffer_size_ = 0;
+ return false;
+ }
+}
+
+// CodedOutputStream =================================================
+
+CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
+ : output_(output),
+ buffer_(NULL),
+ buffer_size_(0),
+ total_bytes_(0) {
+}
+
+CodedOutputStream::~CodedOutputStream() {
+ if (buffer_size_ > 0) {
+ output_->BackUp(buffer_size_);
+ }
+}
+
+bool CodedOutputStream::WriteRaw(const void* data, int size) {
+ while (buffer_size_ < size) {
+ memcpy(buffer_, data, buffer_size_);
+ size -= buffer_size_;
+ data = reinterpret_cast<const uint8*>(data) + buffer_size_;
+ if (!Refresh()) return false;
+ }
+
+ memcpy(buffer_, data, size);
+ Advance(size);
+ return true;
+}
+
+
+bool CodedOutputStream::WriteLittleEndian32(uint32 value) {
+ uint8 bytes[sizeof(value)];
+
+ bool use_fast = buffer_size_ >= sizeof(value);
+ uint8* ptr = use_fast ? buffer_ : bytes;
+
+ ptr[0] = static_cast<uint8>(value );
+ ptr[1] = static_cast<uint8>(value >> 8);
+ ptr[2] = static_cast<uint8>(value >> 16);
+ ptr[3] = static_cast<uint8>(value >> 24);
+
+ if (use_fast) {
+ Advance(sizeof(value));
+ return true;
+ } else {
+ return WriteRaw(bytes, sizeof(value));
+ }
+}
+
+bool CodedOutputStream::WriteLittleEndian64(uint64 value) {
+ uint8 bytes[sizeof(value)];
+
+ uint32 part0 = static_cast<uint32>(value);
+ uint32 part1 = static_cast<uint32>(value >> 32);
+
+ bool use_fast = buffer_size_ >= sizeof(value);
+ uint8* ptr = use_fast ? buffer_ : bytes;
+
+ ptr[0] = static_cast<uint8>(part0 );
+ ptr[1] = static_cast<uint8>(part0 >> 8);
+ ptr[2] = static_cast<uint8>(part0 >> 16);
+ ptr[3] = static_cast<uint8>(part0 >> 24);
+ ptr[4] = static_cast<uint8>(part1 );
+ ptr[5] = static_cast<uint8>(part1 >> 8);
+ ptr[6] = static_cast<uint8>(part1 >> 16);
+ ptr[7] = static_cast<uint8>(part1 >> 24);
+
+ if (use_fast) {
+ Advance(sizeof(value));
+ return true;
+ } else {
+ return WriteRaw(bytes, sizeof(value));
+ }
+}
+
+bool CodedOutputStream::WriteVarint32Fallback(uint32 value) {
+ if (buffer_size_ >= kMaxVarint32Bytes) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this write won't cross the end, so we can skip the checks.
+ uint8* target = buffer_;
+
+ target[0] = static_cast<uint8>(value | 0x80);
+ if (value >= (1 << 7)) {
+ target[1] = static_cast<uint8>((value >> 7) | 0x80);
+ if (value >= (1 << 14)) {
+ target[2] = static_cast<uint8>((value >> 14) | 0x80);
+ if (value >= (1 << 21)) {
+ target[3] = static_cast<uint8>((value >> 21) | 0x80);
+ if (value >= (1 << 28)) {
+ target[4] = static_cast<uint8>(value >> 28);
+ Advance(5);
+ } else {
+ target[3] &= 0x7F;
+ Advance(4);
+ }
+ } else {
+ target[2] &= 0x7F;
+ Advance(3);
+ }
+ } else {
+ target[1] &= 0x7F;
+ Advance(2);
+ }
+ } else {
+ target[0] &= 0x7F;
+ Advance(1);
+ }
+
+ return true;
+ } else {
+ // Slow path: This write might cross the end of the buffer, so we
+ // compose the bytes first then use WriteRaw().
+ uint8 bytes[kMaxVarint32Bytes];
+ int size = 0;
+ while (value > 0x7F) {
+ bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
+ value >>= 7;
+ }
+ bytes[size++] = static_cast<uint8>(value) & 0x7F;
+ return WriteRaw(bytes, size);
+ }
+}
+
+bool CodedOutputStream::WriteVarint64(uint64 value) {
+ if (buffer_size_ >= kMaxVarintBytes) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this write won't cross the end, so we can skip the checks.
+ uint8* target = buffer_;
+
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ uint32 part0 = static_cast<uint32>(value );
+ uint32 part1 = static_cast<uint32>(value >> 28);
+ uint32 part2 = static_cast<uint32>(value >> 56);
+
+ int size;
+
+ // Here we can't really optimize for small numbers, since the value is
+ // split into three parts. Cheking for numbers < 128, for instance,
+ // would require three comparisons, since you'd have to make sure part1
+ // and part2 are zero. However, if the caller is using 64-bit integers,
+ // it is likely that they expect the numbers to often be very large, so
+ // we probably don't want to optimize for small numbers anyway. Thus,
+ // we end up with a hardcoded binary search tree...
+ if (part2 == 0) {
+ if (part1 == 0) {
+ if (part0 < (1 << 14)) {
+ if (part0 < (1 << 7)) {
+ size = 1; goto size1;
+ } else {
+ size = 2; goto size2;
+ }
+ } else {
+ if (part0 < (1 << 21)) {
+ size = 3; goto size3;
+ } else {
+ size = 4; goto size4;
+ }
+ }
+ } else {
+ if (part1 < (1 << 14)) {
+ if (part1 < (1 << 7)) {
+ size = 5; goto size5;
+ } else {
+ size = 6; goto size6;
+ }
+ } else {
+ if (part1 < (1 << 21)) {
+ size = 7; goto size7;
+ } else {
+ size = 8; goto size8;
+ }
+ }
+ }
+ } else {
+ if (part2 < (1 << 7)) {
+ size = 9; goto size9;
+ } else {
+ size = 10; goto size10;
+ }
+ }
+
+ GOOGLE_LOG(FATAL) << "Can't get here.";
+
+ size10: target[9] = static_cast<uint8>((part2 >> 7) | 0x80);
+ size9 : target[8] = static_cast<uint8>((part2 ) | 0x80);
+ size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80);
+ size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80);
+ size6 : target[5] = static_cast<uint8>((part1 >> 7) | 0x80);
+ size5 : target[4] = static_cast<uint8>((part1 ) | 0x80);
+ size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80);
+ size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80);
+ size2 : target[1] = static_cast<uint8>((part0 >> 7) | 0x80);
+ size1 : target[0] = static_cast<uint8>((part0 ) | 0x80);
+
+ target[size-1] &= 0x7F;
+ Advance(size);
+ return true;
+ } else {
+ // Slow path: This write might cross the end of the buffer, so we
+ // compose the bytes first then use WriteRaw().
+ uint8 bytes[kMaxVarintBytes];
+ int size = 0;
+ while (value > 0x7F) {
+ bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
+ value >>= 7;
+ }
+ bytes[size++] = static_cast<uint8>(value) & 0x7F;
+ return WriteRaw(bytes, size);
+ }
+}
+
+bool CodedOutputStream::Refresh() {
+ void* void_buffer;
+ if (output_->Next(&void_buffer, &buffer_size_)) {
+ buffer_ = reinterpret_cast<uint8*>(void_buffer);
+ total_bytes_ += buffer_size_;
+ return true;
+ } else {
+ buffer_ = NULL;
+ buffer_size_ = 0;
+ return false;
+ }
+}
+
+int CodedOutputStream::VarintSize32Fallback(uint32 value) {
+ if (value < (1 << 7)) {
+ return 1;
+ } else if (value < (1 << 14)) {
+ return 2;
+ } else if (value < (1 << 21)) {
+ return 3;
+ } else if (value < (1 << 28)) {
+ return 4;
+ } else {
+ return 5;
+ }
+}
+
+int CodedOutputStream::VarintSize64(uint64 value) {
+ if (value < (1ull << 35)) {
+ if (value < (1ull << 7)) {
+ return 1;
+ } else if (value < (1ull << 14)) {
+ return 2;
+ } else if (value < (1ull << 21)) {
+ return 3;
+ } else if (value < (1ull << 28)) {
+ return 4;
+ } else {
+ return 5;
+ }
+ } else {
+ if (value < (1ull << 42)) {
+ return 6;
+ } else if (value < (1ull << 49)) {
+ return 7;
+ } else if (value < (1ull << 56)) {
+ return 8;
+ } else if (value < (1ull << 63)) {
+ return 9;
+ } else {
+ return 10;
+ }
+ }
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
new file mode 100644
index 00000000..91e5c56a
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.h
@@ -0,0 +1,592 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the CodedInputStream and CodedOutputStream classes,
+// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
+// and allow you to read or write individual pieces of data in various
+// formats. In particular, these implement the varint encoding for
+// integers, a simple variable-length encoding in which smaller numbers
+// take fewer bytes.
+//
+// Typically these classes will only be used internally by the protocol
+// buffer library in order to encode and decode protocol buffers. Clients
+// of the library only need to know about this class if they wish to write
+// custom message parsing or serialization procedures.
+//
+// CodedOutputStream example:
+// // Write some data to "myfile". First we write a 4-byte "magic number"
+// // to identify the file type, then write a length-delimited string. The
+// // string is composed of a varint giving the length followed by the raw
+// // bytes.
+// int fd = open("myfile", O_WRONLY);
+// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
+// CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
+//
+// int magic_number = 1234;
+// char text[] = "Hello world!";
+// coded_output->WriteLittleEndian32(magic_number);
+// coded_output->WriteVarint32(strlen(text));
+// coded_output->WriteRaw(text, strlen(text));
+//
+// delete coded_output;
+// delete raw_output;
+// close(fd);
+//
+// CodedInputStream example:
+// // Read a file created by the above code.
+// int fd = open("myfile", O_RDONLY);
+// ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+// CodedInputStream coded_input = new CodedInputStream(raw_input);
+//
+// coded_input->ReadLittleEndian32(&magic_number);
+// if (magic_number != 1234) {
+// cerr << "File not in expected format." << endl;
+// return;
+// }
+//
+// uint32 size;
+// coded_input->ReadVarint32(&size);
+//
+// char* text = new char[size + 1];
+// coded_input->ReadRaw(buffer, size);
+// text[size] = '\0';
+//
+// delete coded_input;
+// delete raw_input;
+// close(fd);
+//
+// cout << "Text is: " << text << endl;
+// delete [] text;
+//
+// For those who are interested, varint encoding is defined as follows:
+//
+// The encoding operates on unsigned integers of up to 64 bits in length.
+// Each byte of the encoded value has the format:
+// * bits 0-6: Seven bits of the number being encoded.
+// * bit 7: Zero if this is the last byte in the encoding (in which
+// case all remaining bits of the number are zero) or 1 if
+// more bytes follow.
+// The first byte contains the least-significant 7 bits of the number, the
+// second byte (if present) contains the next-least-significant 7 bits,
+// and so on. So, the binary number 1011000101011 would be encoded in two
+// bytes as "10101011 00101100".
+//
+// In theory, varint could be used to encode integers of any length.
+// However, for practicality we set a limit at 64 bits. The maximum encoded
+// length of a number is thus 10 bytes.
+
+#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+namespace io {
+
+// Defined in this file.
+class CodedInputStream;
+class CodedOutputStream;
+
+// Defined in other files.
+class ZeroCopyInputStream; // zero_copy_stream.h
+class ZeroCopyOutputStream; // zero_copy_stream.h
+
+// Class which reads and decodes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
+// Most users will not need to deal with CodedInputStream.
+//
+// Most methods of CodedInputStream that return a bool return false if an
+// underlying I/O error occurs or if the data is malformed. Once such a
+// failure occurs, the CodedInputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedInputStream {
+ public:
+ // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
+ explicit CodedInputStream(ZeroCopyInputStream* input);
+
+ // Destroy the CodedInputStream and position the underlying
+ // ZeroCopyInputStream at the first unread byte. If an error occurred while
+ // reading (causing a method to return false), then the exact position of
+ // the input stream may be anywhere between the last value that was read
+ // successfully and the stream's byte limit.
+ ~CodedInputStream();
+
+
+ // Skips a number of bytes. Returns false if an underlying read error
+ // occurs.
+ bool Skip(int count);
+
+ // Read raw bytes, copying them into the given buffer.
+ bool ReadRaw(void* buffer, int size);
+
+ // Like ReadRaw, but reads into a string.
+ //
+ // Implementation Note: ReadString() grows the string gradually as it
+ // reads in the data, rather than allocating the entire requested size
+ // upfront. This prevents denial-of-service attacks in which a client
+ // could claim that a string is going to be MAX_INT bytes long in order to
+ // crash the server because it can't allocate this much space at once.
+ bool ReadString(string* buffer, int size);
+
+
+ // Read a 32-bit little-endian integer.
+ bool ReadLittleEndian32(uint32* value);
+ // Read a 64-bit little-endian integer.
+ bool ReadLittleEndian64(uint64* value);
+
+ // Read an unsigned integer with Varint encoding, truncating to 32 bits.
+ // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
+ // it to uint32, but may be more efficient.
+ bool ReadVarint32(uint32* value);
+ // Read an unsigned integer with Varint encoding.
+ bool ReadVarint64(uint64* value);
+
+ // Read a tag. This calls ReadVarint32() and returns the result, or returns
+ // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
+ // the last tag value, which can be checked with LastTagWas().
+ // Always inline because this is only called in once place per parse loop
+ // but it is called for every iteration of said loop, so it should be fast.
+ // GCC doesn't want to inline this by default.
+ uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Usually returns true if calling ReadVarint32() now would produce the given
+ // value. Will always return false if ReadVarint32() would not return the
+ // given value. If ExpectTag() returns true, it also advances past
+ // the varint. For best performance, use a compile-time constant as the
+ // parameter.
+ // Always inline because this collapses to a small number of instructions
+ // when given a constant parameter, but GCC doesn't want to inline by default.
+ bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Usually returns true if no more bytes can be read. Always returns false
+ // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
+ // call to LastTagWas() will act as if ReadTag() had been called and returned
+ // zero, and ConsumedEntireMessage() will return true.
+ bool ExpectAtEnd();
+
+ // If the last call to ReadTag() returned the given value, returns true.
+ // Otherwise, returns false;
+ //
+ // This is needed because parsers for some types of embedded messages
+ // (with field type TYPE_GROUP) don't actually know that they've reached the
+ // end of a message until they see an ENDGROUP tag, which was actually part
+ // of the enclosing message. The enclosing message would like to check that
+ // tag to make sure it had the right number, so it calls LastTagWas() on
+ // return from the embedded parser to check.
+ bool LastTagWas(uint32 expected);
+
+ // When parsing message (but NOT a group), this method must be called
+ // immediately after MergeFromCodedStream() returns (if it returns true)
+ // to further verify that the message ended in a legitimate way. For
+ // example, this verifies that parsing did not end on an end-group tag.
+ // It also checks for some cases where, due to optimizations,
+ // MergeFromCodedStream() can incorrectly return true.
+ bool ConsumedEntireMessage();
+
+ // Limits ----------------------------------------------------------
+ // Limits are used when parsing length-delimited embedded messages.
+ // After the message's length is read, PushLimit() is used to prevent
+ // the CodedInputStream from reading beyond that length. Once the
+ // embedded message has been parsed, PopLimit() is called to undo the
+ // limit.
+
+ // Opaque type used with PushLimit() and PopLimit(). Do not modify
+ // values of this type yourself. The only reason that this isn't a
+ // struct with private internals is for efficiency.
+ typedef int Limit;
+
+ // Places a limit on the number of bytes that the stream may read,
+ // starting from the current position. Once the stream hits this limit,
+ // it will act like the end of the input has been reached until PopLimit()
+ // is called.
+ //
+ // As the names imply, the stream conceptually has a stack of limits. The
+ // shortest limit on the stack is always enforced, even if it is not the
+ // top limit.
+ //
+ // The value returned by PushLimit() is opaque to the caller, and must
+ // be passed unchanged to the corresponding call to PopLimit().
+ Limit PushLimit(int byte_limit);
+
+ // Pops the last limit pushed by PushLimit(). The input must be the value
+ // returned by that call to PushLimit().
+ void PopLimit(Limit limit);
+
+ // Returns the number of bytes left until the nearest limit on the
+ // stack is hit, or -1 if no limits are in place.
+ int BytesUntilLimit();
+
+ // Total Bytes Limit -----------------------------------------------
+ // To prevent malicious users from sending excessively large messages
+ // and causing integer overflows or memory exhaustion, CodedInputStream
+ // imposes a hard limit on the total number of bytes it will read.
+
+ // Sets the maximum number of bytes that this CodedInputStream will read
+ // before refusing to continue. To prevent integer overflows in the
+ // protocol buffers implementation, as well as to prevent servers from
+ // allocating enormous amounts of memory to hold parsed messages, the
+ // maximum message length should be limited to the shortest length that
+ // will not harm usability. The theoretical shortest message that could
+ // cause integer overflows is 512MB. The default limit is 64MB. Apps
+ // should set shorter limits if possible. If warning_threshold is not -1,
+ // a warning will be printed to stderr after warning_threshold bytes are
+ // read. An error will always be printed to stderr if the limit is
+ // reached.
+ //
+ // This is unrelated to PushLimit()/PopLimit().
+ //
+ // Hint: If you are reading this because your program is printing a
+ // warning about dangerously large protocol messages, you may be
+ // confused about what to do next. The best option is to change your
+ // design such that excessively large messages are not necessary.
+ // For example, try to design file formats to consist of many small
+ // messages rather than a single large one. If this is infeasible,
+ // you will need to increase the limit. Chances are, though, that
+ // your code never constructs a CodedInputStream on which the limit
+ // can be set. You probably parse messages by calling things like
+ // Message::ParseFromString(). In this case, you will need to change
+ // your code to instead construct some sort of ZeroCopyInputStream
+ // (e.g. an ArrayInputStream), construct a CodedInputStream around
+ // that, then call Message::ParseFromCodedStream() instead. Then
+ // you can adjust the limit. Yes, it's more work, but you're doing
+ // something unusual.
+ void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
+
+ // Recursion Limit -------------------------------------------------
+ // To prevent corrupt or malicious messages from causing stack overflows,
+ // we must keep track of the depth of recursion when parsing embedded
+ // messages and groups. CodedInputStream keeps track of this because it
+ // is the only object that is passed down the stack during parsing.
+
+ // Sets the maximum recursion depth. The default is 64.
+ void SetRecursionLimit(int limit);
+
+ // Increments the current recursion depth. Returns true if the depth is
+ // under the limit, false if it has gone over.
+ bool IncrementRecursionDepth();
+
+ // Decrements the recursion depth.
+ void DecrementRecursionDepth();
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
+
+ ZeroCopyInputStream* input_;
+ const uint8* buffer_;
+ int buffer_size_; // size of current buffer
+ int total_bytes_read_; // total bytes read from input_, including
+ // the current buffer
+
+ // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
+ // so that we can BackUp() on destruction.
+ int overflow_bytes_;
+
+ // LastTagWas() stuff.
+ uint32 last_tag_; // result of last ReadTag().
+
+ // This is set true by ReadVarint32Fallback() if it is called when exactly
+ // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
+ // reach the end of a message and attempt to read another tag.
+ bool legitimate_message_end_;
+
+ // See EnableAliasing().
+ bool aliasing_enabled_;
+
+ // Limits
+ Limit current_limit_; // if position = -1, no limit is applied
+
+ // For simplicity, if the current buffer crosses a limit (either a normal
+ // limit created by PushLimit() or the total bytes limit), buffer_size_
+ // only tracks the number of bytes before that limit. This field
+ // contains the number of bytes after it. Note that this implies that if
+ // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
+ // hit a limit. However, if both are zero, it doesn't necessarily mean
+ // we aren't at a limit -- the buffer may have ended exactly at the limit.
+ int buffer_size_after_limit_;
+
+ // Maximum number of bytes to read, period. This is unrelated to
+ // current_limit_. Set using SetTotalBytesLimit().
+ int total_bytes_limit_;
+ int total_bytes_warning_threshold_;
+
+ // Current recursion depth, controlled by IncrementRecursionDepth() and
+ // DecrementRecursionDepth().
+ int recursion_depth_;
+ // Recursion depth limit, set by SetRecursionLimit().
+ int recursion_limit_;
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Recomputes the value of buffer_size_after_limit_. Must be called after
+ // current_limit_ or total_bytes_limit_ changes.
+ void RecomputeBufferLimits();
+
+ // Writes an error message saying that we hit total_bytes_limit_.
+ void PrintTotalBytesLimitError();
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(buffer_size_).
+ bool Refresh();
+
+ bool ReadVarint32Fallback(uint32* value);
+};
+
+// Class which encodes and writes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
+// Most users will not need to deal with CodedOutputStream.
+//
+// Most methods of CodedOutputStream which return a bool return false if an
+// underlying I/O error occurs. Once such a failure occurs, the
+// CodedOutputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedOutputStream {
+ public:
+ // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
+ explicit CodedOutputStream(ZeroCopyOutputStream* output);
+
+ // Destroy the CodedOutputStream and position the underlying
+ // ZeroCopyOutputStream immediately after the last byte written.
+ ~CodedOutputStream();
+
+ // Write raw bytes, copying them from the given buffer.
+ bool WriteRaw(const void* buffer, int size);
+
+ // Equivalent to WriteRaw(str.data(), str.size()).
+ bool WriteString(const string& str);
+
+
+ // Write a 32-bit little-endian integer.
+ bool WriteLittleEndian32(uint32 value);
+ // Write a 64-bit little-endian integer.
+ bool WriteLittleEndian64(uint64 value);
+
+ // Write an unsigned integer with Varint encoding. Writing a 32-bit value
+ // is equivalent to casting it to uint64 and writing it as a 64-bit value,
+ // but may be more efficient.
+ bool WriteVarint32(uint32 value);
+ // Write an unsigned integer with Varint encoding.
+ bool WriteVarint64(uint64 value);
+
+ // Equivalent to WriteVarint32() except when the value is negative,
+ // in which case it must be sign-extended to a full 10 bytes.
+ bool WriteVarint32SignExtended(int32 value);
+
+ // This is identical to WriteVarint32(), but optimized for writing tags.
+ // In particular, if the input is a compile-time constant, this method
+ // compiles down to a couple instructions.
+ // Always inline because otherwise the aformentioned optimization can't work,
+ // but GCC by default doesn't want to inline this.
+ bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE;
+
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize32(uint32 value);
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize64(uint64 value);
+
+ // If negative, 10 bytes. Otheriwse, same as VarintSize32().
+ static int VarintSize32SignExtended(int32 value);
+
+ // Returns the total number of bytes written since this object was created.
+ inline int ByteCount() const;
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
+
+ ZeroCopyOutputStream* output_;
+ uint8* buffer_;
+ int buffer_size_;
+ int total_bytes_; // Sum of sizes of all buffers seen so far.
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(buffer_size_).
+ bool Refresh();
+
+ bool WriteVarint32Fallback(uint32 value);
+ static int VarintSize32Fallback(uint32 value);
+};
+
+// inline methods ====================================================
+// The vast majority of varints are only one byte. These inline
+// methods optimize for that case.
+
+inline bool CodedInputStream::ReadVarint32(uint32* value) {
+ if (buffer_size_ != 0 && *buffer_ < 0x80) {
+ *value = *buffer_;
+ Advance(1);
+ return true;
+ } else {
+ return ReadVarint32Fallback(value);
+ }
+}
+
+inline uint32 CodedInputStream::ReadTag() {
+ if (buffer_size_ != 0 && buffer_[0] < 0x80) {
+ last_tag_ = buffer_[0];
+ Advance(1);
+ return last_tag_;
+ } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) {
+ last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7);
+ Advance(2);
+ return last_tag_;
+ } else if (ReadVarint32Fallback(&last_tag_)) {
+ return last_tag_;
+ } else {
+ last_tag_ = 0;
+ return 0;
+ }
+}
+
+inline bool CodedInputStream::LastTagWas(uint32 expected) {
+ return last_tag_ == expected;
+}
+
+inline bool CodedInputStream::ConsumedEntireMessage() {
+ return legitimate_message_end_;
+}
+
+inline bool CodedInputStream::ExpectTag(uint32 expected) {
+ if (expected < (1 << 7)) {
+ if (buffer_size_ != 0 && buffer_[0] == expected) {
+ Advance(1);
+ return true;
+ } else {
+ return false;
+ }
+ } else if (expected < (1 << 14)) {
+ if (buffer_size_ >= 2 &&
+ buffer_[0] == static_cast<uint8>(expected | 0x80) &&
+ buffer_[1] == static_cast<uint8>(expected >> 7)) {
+ Advance(2);
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ // Don't bother optimizing for larger values.
+ return false;
+ }
+}
+
+inline bool CodedInputStream::ExpectAtEnd() {
+ // If we are at a limit we know no more bytes can be read. Otherwise, it's
+ // hard to say without calling Refresh(), and we'd rather not do that.
+
+ if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) {
+ last_tag_ = 0; // Pretend we called ReadTag()...
+ legitimate_message_end_ = true; // ... and it hit EOF.
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline bool CodedOutputStream::WriteVarint32(uint32 value) {
+ if (value < 0x80 && buffer_size_ > 0) {
+ *buffer_ = value;
+ Advance(1);
+ return true;
+ } else {
+ return WriteVarint32Fallback(value);
+ }
+}
+
+inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) {
+ if (value < 0) {
+ return WriteVarint64(static_cast<uint64>(value));
+ } else {
+ return WriteVarint32(static_cast<uint32>(value));
+ }
+}
+
+inline bool CodedOutputStream::WriteTag(uint32 value) {
+ if (value < (1 << 7)) {
+ if (buffer_size_ != 0) {
+ buffer_[0] = value;
+ Advance(1);
+ return true;
+ }
+ } else if (value < (1 << 14)) {
+ if (buffer_size_ >= 2) {
+ buffer_[0] = static_cast<uint8>(value | 0x80);
+ buffer_[1] = static_cast<uint8>(value >> 7);
+ Advance(2);
+ return true;
+ }
+ }
+ return WriteVarint32Fallback(value);
+}
+
+inline int CodedOutputStream::VarintSize32(uint32 value) {
+ if (value < (1 << 7)) {
+ return 1;
+ } else {
+ return VarintSize32Fallback(value);
+ }
+}
+
+inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
+ if (value < 0) {
+ return 10; // TODO(kenton): Make this a symbolic constant.
+ } else {
+ return VarintSize32(static_cast<uint32>(value));
+ }
+}
+
+inline bool CodedOutputStream::WriteString(const string& str) {
+ return WriteRaw(str.data(), str.size());
+}
+
+inline int CodedOutputStream::ByteCount() const {
+ return total_bytes_ - buffer_size_;
+}
+
+inline void CodedInputStream::Advance(int amount) {
+ buffer_ += amount;
+ buffer_size_ -= amount;
+}
+
+inline void CodedOutputStream::Advance(int amount) {
+ buffer_ += amount;
+ buffer_size_ -= amount;
+}
+
+inline void CodedInputStream::SetRecursionLimit(int limit) {
+ recursion_limit_ = limit;
+}
+
+inline bool CodedInputStream::IncrementRecursionDepth() {
+ ++recursion_depth_;
+ return recursion_depth_ <= recursion_limit_;
+}
+
+inline void CodedInputStream::DecrementRecursionDepth() {
+ if (recursion_depth_ > 0) --recursion_depth_;
+}
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
new file mode 100644
index 00000000..c1a88349
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -0,0 +1,929 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains tests and benchmarks.
+
+#include <vector>
+
+#include <google/protobuf/io/coded_stream.h>
+
+#include <limits.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/stubs/strutil.h>
+
+
+// This declares an unsigned long long integer literal in a portable way.
+// (The original macro is way too big and ruins my formatting.)
+#undef ULL
+#define ULL(x) GOOGLE_ULONGLONG(x)
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// ===================================================================
+// Data-Driven Test Infrastructure
+
+// TEST_1D and TEST_2D are macros I'd eventually like to see added to
+// gTest. These macros can be used to declare tests which should be
+// run multiple times, once for each item in some input array. TEST_1D
+// tests all cases in a single input array. TEST_2D tests all
+// combinations of cases from two arrays. The arrays must be statically
+// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example:
+//
+// int kCases[] = {1, 2, 3, 4}
+// TEST_1D(MyFixture, MyTest, kCases) {
+// EXPECT_GT(kCases_case, 0);
+// }
+//
+// This test iterates through the numbers 1, 2, 3, and 4 and tests that
+// they are all grater than zero. In case of failure, the exact case
+// which failed will be printed. The case type must be printable using
+// ostream::operator<<.
+
+#define TEST_1D(FIXTURE, NAME, CASES) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType> \
+ void DoSingleCase(const CaseType& CASES##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES " case #" << i << ": " << CASES[i]); \
+ DoSingleCase(CASES[i]); \
+ } \
+ } \
+ \
+ template <typename CaseType> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
+
+#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType1, typename CaseType2> \
+ void DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \
+ for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
+ << #CASES2 " case #" << j << ": " << CASES2[j]); \
+ DoSingleCase(CASES1[i], CASES2[j]); \
+ } \
+ } \
+ } \
+ \
+ template <typename CaseType1, typename CaseType2> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case)
+
+// ===================================================================
+
+class CodedStreamTest : public testing::Test {
+ protected:
+ static const int kBufferSize = 1024 * 64;
+ static uint8 buffer_[kBufferSize];
+};
+
+uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize];
+
+// We test each operation over a variety of block sizes to insure that
+// we test cases where reads or writes cross buffer boundaries, cases
+// where they don't, and cases where there is so much buffer left that
+// we can use special optimized paths that don't worry about bounds
+// checks.
+const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+
+// -------------------------------------------------------------------
+// Varint tests.
+
+struct VarintCase {
+ uint8 bytes[10]; // Encoded bytes.
+ int size; // Encoded size, in bytes.
+ uint64 value; // Parsed value.
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintCase& c) {
+ return os << c.value;
+}
+
+VarintCase kVarintCases[] = {
+ // 32-bit values
+ {{0x00} , 1, 0},
+ {{0x01} , 1, 1},
+ {{0x7f} , 1, 127},
+ {{0xa2, 0x74}, 2, (0x22 << 0) | (0x74 << 7)}, // 14882
+ {{0xbe, 0xf7, 0x92, 0x84, 0x0b}, 5, // 2961488830
+ (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) |
+ (ULL(0x0b) << 28)},
+
+ // 64-bit
+ {{0xbe, 0xf7, 0x92, 0x84, 0x1b}, 5, // 7256456126
+ (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) |
+ (ULL(0x1b) << 28)},
+ {{0x80, 0xe6, 0xeb, 0x9c, 0xc3, 0xc9, 0xa4, 0x49}, 8, // 41256202580718336
+ (0x00 << 0) | (0x66 << 7) | (0x6b << 14) | (0x1c << 21) |
+ (ULL(0x43) << 28) | (ULL(0x49) << 35) | (ULL(0x24) << 42) |
+ (ULL(0x49) << 49)},
+ // 11964378330978735131
+ {{0x9b, 0xa8, 0xf9, 0xc2, 0xbb, 0xd6, 0x80, 0x85, 0xa6, 0x01}, 10,
+ (0x1b << 0) | (0x28 << 7) | (0x79 << 14) | (0x42 << 21) |
+ (ULL(0x3b) << 28) | (ULL(0x56) << 35) | (ULL(0x00) << 42) |
+ (ULL(0x05) << 49) | (ULL(0x26) << 56) | (ULL(0x01) << 63)},
+};
+
+TEST_2D(CodedStreamTest, ReadVarint32, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadVarint32(&value));
+ EXPECT_EQ(static_cast<uint32>(kVarintCases_case.value), value);
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+ EXPECT_EQ(expected_value, coded_input.ReadTag());
+
+ EXPECT_TRUE(coded_input.LastTagWas(expected_value));
+ EXPECT_FALSE(coded_input.LastTagWas(expected_value + 1));
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
+ // Leave one byte at the beginning of the buffer so we can read it
+ // to force the first buffer to be loaded.
+ buffer_[0] = '\0';
+ memcpy(buffer_ + 1, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+
+ {
+ CodedInputStream coded_input(&input);
+
+ // Read one byte to force coded_input.Refill() to be called. Otherwise,
+ // ExpectTag() will return a false negative.
+ uint8 dummy;
+ coded_input.ReadRaw(&dummy, 1);
+ EXPECT_EQ((uint)'\0', (uint)dummy);
+
+ uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+
+ // ExpectTag() produces false negatives for large values.
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_FALSE(coded_input.ExpectTag(expected_value + 1));
+ EXPECT_TRUE(coded_input.ExpectTag(expected_value));
+ } else {
+ EXPECT_FALSE(coded_input.ExpectTag(expected_value));
+ }
+ }
+
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_EQ(kVarintCases_case.size + 1, input.ByteCount());
+ } else {
+ EXPECT_EQ(1, input.ByteCount());
+ }
+}
+
+TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadVarint64(&value));
+ EXPECT_EQ(kVarintCases_case.value, value);
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, WriteVarint32, kVarintCases, kBlockSizes) {
+ if (kVarintCases_case.value > ULL(0x00000000FFFFFFFF)) {
+ // Skip this test for the 64-bit values.
+ return;
+ }
+
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteVarint32(
+ static_cast<uint32>(kVarintCases_case.value)));
+
+ EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, output.ByteCount());
+ EXPECT_EQ(0,
+ memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size));
+}
+
+TEST_2D(CodedStreamTest, WriteVarint64, kVarintCases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteVarint64(kVarintCases_case.value));
+
+ EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, output.ByteCount());
+ EXPECT_EQ(0,
+ memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size));
+}
+
+// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+int32 kSignExtendedVarintCases[] = {
+ 0, 1, -1, 1237894, -37895138
+};
+
+TEST_2D(CodedStreamTest, WriteVarint32SignExtended,
+ kSignExtendedVarintCases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteVarint32SignExtended(
+ kSignExtendedVarintCases_case));
+
+ if (kSignExtendedVarintCases_case < 0) {
+ EXPECT_EQ(10, coded_output.ByteCount());
+ } else {
+ EXPECT_LE(coded_output.ByteCount(), 5);
+ }
+ }
+
+ if (kSignExtendedVarintCases_case < 0) {
+ EXPECT_EQ(10, output.ByteCount());
+ } else {
+ EXPECT_LE(output.ByteCount(), 5);
+ }
+
+ // Read value back in as a varint64 and insure it matches.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadVarint64(&value));
+
+ EXPECT_EQ(kSignExtendedVarintCases_case, static_cast<int64>(value));
+ }
+
+ EXPECT_EQ(output.ByteCount(), input.ByteCount());
+}
+
+#endif
+
+
+// -------------------------------------------------------------------
+// Varint failure test.
+
+struct VarintErrorCase {
+ uint8 bytes[12];
+ int size;
+ bool can_parse;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintErrorCase& c) {
+ return os << "size " << c.size;
+}
+
+const VarintErrorCase kVarintErrorCases[] = {
+ // Control case. (Insures that there isn't something else wrong that
+ // makes parsing always fail.)
+ {{0x00}, 1, true},
+
+ // No input data.
+ {{}, 0, false},
+
+ // Input ends unexpectedly.
+ {{0xf0, 0xab}, 2, false},
+
+ // Input ends unexpectedly after 32 bits.
+ {{0xf0, 0xab, 0xc9, 0x9a, 0xf8, 0xb2}, 6, false},
+
+ // Longer than 10 bytes.
+ {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
+ 11, false},
+};
+
+TEST_2D(CodedStreamTest, ReadVarint32Error, kVarintErrorCases, kBlockSizes) {
+ memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size);
+ ArrayInputStream input(buffer_, kVarintErrorCases_case.size,
+ kBlockSizes_case);
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint32(&value));
+}
+
+TEST_2D(CodedStreamTest, ReadVarint64Error, kVarintErrorCases, kBlockSizes) {
+ memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size);
+ ArrayInputStream input(buffer_, kVarintErrorCases_case.size,
+ kBlockSizes_case);
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint64(&value));
+}
+
+// -------------------------------------------------------------------
+// VarintSize
+
+struct VarintSizeCase {
+ uint64 value;
+ int size;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintSizeCase& c) {
+ return os << c.value;
+}
+
+VarintSizeCase kVarintSizeCases[] = {
+ {0u, 1},
+ {1u, 1},
+ {127u, 1},
+ {128u, 2},
+ {758923u, 3},
+ {4000000000u, 5},
+ {ULL(41256202580718336), 8},
+ {ULL(11964378330978735131), 10},
+};
+
+TEST_1D(CodedStreamTest, VarintSize32, kVarintSizeCases) {
+ if (kVarintSizeCases_case.value > 0xffffffffu) {
+ // Skip 64-bit values.
+ return;
+ }
+
+ EXPECT_EQ(kVarintSizeCases_case.size,
+ CodedOutputStream::VarintSize32(
+ static_cast<uint32>(kVarintSizeCases_case.value)));
+}
+
+TEST_1D(CodedStreamTest, VarintSize64, kVarintSizeCases) {
+ EXPECT_EQ(kVarintSizeCases_case.size,
+ CodedOutputStream::VarintSize64(kVarintSizeCases_case.value));
+}
+
+// -------------------------------------------------------------------
+// Fixed-size int tests
+
+struct Fixed32Case {
+ uint8 bytes[sizeof(uint32)]; // Encoded bytes.
+ uint32 value; // Parsed value.
+};
+
+struct Fixed64Case {
+ uint8 bytes[sizeof(uint64)]; // Encoded bytes.
+ uint64 value; // Parsed value.
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Fixed32Case& c) {
+ return os << "0x" << hex << c.value << dec;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Fixed64Case& c) {
+ return os << "0x" << hex << c.value << dec;
+}
+
+Fixed32Case kFixed32Cases[] = {
+ {{0xef, 0xcd, 0xab, 0x90}, 0x90abcdefu},
+ {{0x12, 0x34, 0x56, 0x78}, 0x78563412u},
+};
+
+Fixed64Case kFixed64Cases[] = {
+ {{0xef, 0xcd, 0xab, 0x90, 0x12, 0x34, 0x56, 0x78}, ULL(0x7856341290abcdef)},
+ {{0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}, ULL(0x8877665544332211)},
+};
+
+TEST_2D(CodedStreamTest, ReadLittleEndian32, kFixed32Cases, kBlockSizes) {
+ memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(kFixed32Cases_case.value, value);
+ }
+
+ EXPECT_EQ(sizeof(uint32), input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, ReadLittleEndian64, kFixed64Cases, kBlockSizes) {
+ memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian64(&value));
+ EXPECT_EQ(kFixed64Cases_case.value, value);
+ }
+
+ EXPECT_EQ(sizeof(uint64), input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, WriteLittleEndian32, kFixed32Cases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteLittleEndian32(kFixed32Cases_case.value));
+
+ EXPECT_EQ(sizeof(uint32), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(uint32), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kFixed32Cases_case.bytes, sizeof(uint32)));
+}
+
+TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteLittleEndian64(kFixed64Cases_case.value));
+
+ EXPECT_EQ(sizeof(uint64), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(uint64), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64)));
+}
+
+// -------------------------------------------------------------------
+// Raw reads and writes
+
+const char kRawBytes[] = "Some bytes which will be writted and read raw.";
+
+TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+ char read_buffer[sizeof(kRawBytes)];
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_TRUE(coded_input.ReadRaw(read_buffer, sizeof(kRawBytes)));
+ EXPECT_EQ(0, memcmp(kRawBytes, read_buffer, sizeof(kRawBytes)));
+ }
+
+ EXPECT_EQ(sizeof(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, WriteRaw, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ EXPECT_TRUE(coded_output.WriteRaw(kRawBytes, sizeof(kRawBytes)));
+
+ EXPECT_EQ(sizeof(kRawBytes), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(kRawBytes), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kRawBytes, sizeof(kRawBytes)));
+}
+
+TEST_1D(CodedStreamTest, ReadString, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+// Check to make sure ReadString doesn't crash on impossibly large strings.
+TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ // Try to read a gigabyte.
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+ }
+}
+
+
+// -------------------------------------------------------------------
+// Skip
+
+const char kSkipTestBytes[] =
+ "<Before skipping><To be skipped><After skipping>";
+const char kSkipOutputTestBytes[] =
+ "-----------------<To be skipped>----------------";
+
+TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) {
+ memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen("<Before skipping>")));
+ EXPECT_EQ("<Before skipping>", str);
+ EXPECT_TRUE(coded_input.Skip(strlen("<To be skipped>")));
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen("<After skipping>")));
+ EXPECT_EQ("<After skipping>", str);
+ }
+
+ EXPECT_EQ(strlen(kSkipTestBytes), input.ByteCount());
+}
+
+// -------------------------------------------------------------------
+// Limits
+
+TEST_1D(CodedStreamTest, BasicLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit = coded_input.PushLimit(8);
+
+ // Read until we hit the limit.
+ uint32 value;
+ EXPECT_EQ(8, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(12, input.ByteCount());
+}
+
+// Test what happens when we push two limits where the second (top) one is
+// shorter.
+TEST_1D(CodedStreamTest, SmallLimitOnTopOfBigLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit1 = coded_input.PushLimit(8);
+ EXPECT_EQ(8, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit2 = coded_input.PushLimit(4);
+
+ uint32 value;
+
+ // Read until we hit limit2, the top and shortest limit.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit2);
+
+ // Read until we hit limit1.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit1);
+
+ // No more limits.
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(12, input.ByteCount());
+}
+
+// Test what happens when we push two limits where the second (top) one is
+// longer. In this case, the top limit is shortened to match the previous
+// limit.
+TEST_1D(CodedStreamTest, BigLimitOnTopOfSmallLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit1 = coded_input.PushLimit(4);
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit2 = coded_input.PushLimit(8);
+
+ uint32 value;
+
+ // Read until we hit limit2. Except, wait! limit1 is shorter, so
+ // we end up hitting that first, despite having 4 bytes to go on
+ // limit2.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit2);
+
+ // OK, popped limit2, now limit1 is on top, which we've already hit.
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit1);
+
+ // No more limits.
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(8, input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ExpectAtEnd) {
+ // Test ExpectAtEnd(), which is based on limits.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ EXPECT_FALSE(coded_input.ExpectAtEnd());
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(4);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_TRUE(coded_input.ExpectAtEnd());
+
+ coded_input.PopLimit(limit);
+ EXPECT_FALSE(coded_input.ExpectAtEnd());
+}
+
+TEST_F(CodedStreamTest, NegativeLimit) {
+ // Check what happens when we push a negative limit.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(-1234);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, NegativeLimitAfterReading) {
+ // Check what happens when we push a negative limit.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ ASSERT_TRUE(coded_input.Skip(128));
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(-64);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, OverflowLimit) {
+ // Check what happens when we push a limit large enough that its absolute
+ // position is more than 2GB into the stream.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ ASSERT_TRUE(coded_input.Skip(128));
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(INT_MAX);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimit) {
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(16, -1);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+
+ vector<string> errors;
+
+ {
+ ScopedMemoryLog error_log;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1));
+ errors = error_log.GetMessages(ERROR);
+ }
+
+ ASSERT_EQ(1, errors.size());
+ EXPECT_PRED_FORMAT2(testing::IsSubstring,
+ "A protocol message was rejected because it was too big", errors[0]);
+
+ coded_input.SetTotalBytesLimit(32, -1);
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
+ // total_bytes_limit_ is not a valid place for a message to end.
+
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ // Set both total_bytes_limit and a regular limit at 16 bytes.
+ coded_input.SetTotalBytesLimit(16, -1);
+ CodedInputStream::Limit limit = coded_input.PushLimit(16);
+
+ // Read 16 bytes.
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+
+ // Read a tag. Should fail, but report being a valid endpoint since it's
+ // a regular limit.
+ EXPECT_EQ(0, coded_input.ReadTag());
+ EXPECT_TRUE(coded_input.ConsumedEntireMessage());
+
+ // Pop the limit.
+ coded_input.PopLimit(limit);
+
+ // Read a tag. Should fail, and report *not* being a valid endpoint, since
+ // this time we're hitting the total bytes limit.
+ EXPECT_EQ(0, coded_input.ReadTag());
+ EXPECT_FALSE(coded_input.ConsumedEntireMessage());
+}
+
+TEST_F(CodedStreamTest, RecursionLimit) {
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ coded_input.SetRecursionLimit(4);
+
+ // This is way too much testing for a counter.
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6
+ coded_input.DecrementRecursionDepth(); // 5
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6
+ coded_input.DecrementRecursionDepth(); // 5
+ coded_input.DecrementRecursionDepth(); // 4
+ coded_input.DecrementRecursionDepth(); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+ coded_input.DecrementRecursionDepth(); // 4
+ coded_input.DecrementRecursionDepth(); // 3
+ coded_input.DecrementRecursionDepth(); // 2
+ coded_input.DecrementRecursionDepth(); // 1
+ coded_input.DecrementRecursionDepth(); // 0
+ coded_input.DecrementRecursionDepth(); // 0
+ coded_input.DecrementRecursionDepth(); // 0
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+
+ coded_input.SetRecursionLimit(6);
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 6
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7
+}
+
+class ReallyBigInputStream : public ZeroCopyInputStream {
+ public:
+ ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {}
+ ~ReallyBigInputStream() {}
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size) {
+ // We only expect BackUp() to be called at the end.
+ EXPECT_EQ(0, backup_amount_);
+
+ switch (buffer_count_++) {
+ case 0:
+ *data = buffer_;
+ *size = sizeof(buffer_);
+ return true;
+ case 1:
+ // Return an enormously large buffer that, when combined with the 1k
+ // returned already, should overflow the total_bytes_read_ counter in
+ // CodedInputStream. Note that we'll only read the first 1024 bytes
+ // of this buffer so it's OK that we have it point at buffer_.
+ *data = buffer_;
+ *size = INT_MAX;
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ void BackUp(int count) {
+ backup_amount_ = count;
+ }
+
+ bool Skip(int count) { GOOGLE_LOG(FATAL) << "Not implemented."; return false; }
+ int64 ByteCount() const { GOOGLE_LOG(FATAL) << "Not implemented."; return 0; }
+
+ int backup_amount_;
+
+ private:
+ char buffer_[1024];
+ int64 buffer_count_;
+};
+
+TEST_F(CodedStreamTest, InputOver2G) {
+ // CodedInputStream should gracefully handle input over 2G and call
+ // input.BackUp() with the correct number of bytes on destruction.
+ ReallyBigInputStream input;
+
+ vector<string> errors;
+
+ {
+ ScopedMemoryLog error_log;
+ CodedInputStream coded_input(&input);
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 512));
+ EXPECT_TRUE(coded_input.ReadString(&str, 1024));
+ errors = error_log.GetMessages(ERROR);
+ }
+
+ EXPECT_EQ(INT_MAX - 512, input.backup_amount_);
+ EXPECT_EQ(0, errors.size());
+}
+
+// ===================================================================
+
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/package_info.h b/src/google/protobuf/io/package_info.h
new file mode 100644
index 00000000..8272d51d
--- /dev/null
+++ b/src/google/protobuf/io/package_info.h
@@ -0,0 +1,40 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file exists solely to document the google::protobuf::io namespace.
+// It is not compiled into anything, but it may be read by an automated
+// documentation generator.
+
+namespace google {
+
+namespace protobuf {
+
+// Auxiliary classes used for I/O.
+//
+// The Protocol Buffer library uses the classes in this package to deal with
+// I/O and encoding/decoding raw bytes. Most users will not need to
+// deal with this package. However, users who want to adapt the system to
+// work with their own I/O abstractions -- e.g., to allow Protocol Buffers
+// to be read from a different kind of input stream without the need for a
+// temporary buffer -- should take a closer look.
+namespace io {}
+
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
new file mode 100644
index 00000000..7b3e3de4
--- /dev/null
+++ b/src/google/protobuf/io/printer.cc
@@ -0,0 +1,165 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <google/protobuf/io/printer.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/strutil.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter)
+ : variable_delimiter_(variable_delimiter),
+ output_(output),
+ buffer_(NULL),
+ buffer_size_(0),
+ at_start_of_line_(true),
+ failed_(false) {
+}
+
+Printer::~Printer() {
+ // Only BackUp() if we're sure we've successfully called Next() at least once.
+ if (buffer_size_ > 0) {
+ output_->BackUp(buffer_size_);
+ }
+}
+
+void Printer::Print(const map<string, string>& variables, const char* text) {
+ int size = strlen(text);
+ int pos = 0; // The number of bytes we've written so far.
+
+ for (int i = 0; i < size; i++) {
+ if (text[i] == '\n') {
+ // Saw newline. If there is more text, we may need to insert an indent
+ // here. So, write what we have so far, including the '\n'.
+ Write(text + pos, i - pos + 1);
+ pos = i + 1;
+
+ // Setting this true will cause the next Write() to insert an indent
+ // first.
+ at_start_of_line_ = true;
+
+ } else if (text[i] == variable_delimiter_) {
+ // Saw the start of a variable name.
+
+ // Write what we have so far.
+ Write(text + pos, i - pos);
+ pos = i + 1;
+
+ // Find closing delimiter.
+ const char* end = strchr(text + pos, variable_delimiter_);
+ if (end == NULL) {
+ GOOGLE_LOG(DFATAL) << " Unclosed variable name.";
+ end = text + pos;
+ }
+ int endpos = end - text;
+
+ string varname(text + pos, endpos - pos);
+ if (varname.empty()) {
+ // Two delimiters in a row reduce to a literal delimiter character.
+ Write(&variable_delimiter_, 1);
+ } else {
+ // Replace with the variable's value.
+ map<string, string>::const_iterator iter = variables.find(varname);
+ if (iter == variables.end()) {
+ GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname;
+ } else {
+ Write(iter->second.data(), iter->second.size());
+ }
+ }
+
+ // Advance past this variable.
+ i = endpos;
+ pos = endpos + 1;
+ }
+ }
+
+ // Write the rest.
+ Write(text + pos, size - pos);
+}
+
+void Printer::Print(const char* text) {
+ static map<string, string> empty;
+ Print(empty, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable, const string& value) {
+ map<string, string> vars;
+ vars[variable] = value;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ Print(vars, text);
+}
+
+void Printer::Indent() {
+ indent_ += " ";
+}
+
+void Printer::Outdent() {
+ if (indent_.empty()) {
+ GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
+ return;
+ }
+
+ indent_.resize(indent_.size() - 2);
+}
+
+void Printer::Write(const char* data, int size) {
+ if (failed_) return;
+ if (size == 0) return;
+
+ if (at_start_of_line_) {
+ // Insert an indent.
+ at_start_of_line_ = false;
+ Write(indent_.data(), indent_.size());
+ if (failed_) return;
+ }
+
+ while (size > buffer_size_) {
+ // Data exceeds space in the buffer. Copy what we can and request a
+ // new buffer.
+ memcpy(buffer_, data, buffer_size_);
+ data += buffer_size_;
+ size -= buffer_size_;
+ void* void_buffer;
+ failed_ = !output_->Next(&void_buffer, &buffer_size_);
+ if (failed_) return;
+ buffer_ = reinterpret_cast<char*>(void_buffer);
+ }
+
+ // Buffer is big enough to receive the data; copy it.
+ memcpy(buffer_, data, size);
+ buffer_ += size;
+ buffer_size_ -= size;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
new file mode 100644
index 00000000..ee8f46c8
--- /dev/null
+++ b/src/google/protobuf/io/printer.h
@@ -0,0 +1,109 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Utility class for writing text to a ZeroCopyOutputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__
+#define GOOGLE_PROTOBUF_IO_PRINTER_H__
+
+#include <string>
+#include <map>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+class ZeroCopyOutputStream; // zero_copy_stream.h
+
+// This simple utility class assists in code generation. It basically
+// allows the caller to define a set of variables and then output some
+// text with variable substitutions. Example usage:
+//
+// Printer printer(output, '$');
+// map<string, string> vars;
+// vars["name"] = "Bob";
+// printer.Print(vars, "My name is $name$.");
+//
+// The above writes "My name is Bob." to the output stream.
+//
+// Printer aggressively enforces correct usage, crashing (with assert failures)
+// in the case of undefined variables. This helps greatly in debugging code
+// which uses it. This class is not intended to be used by production servers.
+class LIBPROTOBUF_EXPORT Printer {
+ public:
+ // Create a printer that writes text to the given output stream. Use the
+ // given character as the delimiter for variables.
+ Printer(ZeroCopyOutputStream* output, char variable_delimiter);
+ ~Printer();
+
+ // Print some text after applying variable substitutions. If a particular
+ // variable in the text is not defined, this will crash. Variables to be
+ // substituted are identified by their names surrounded by delimiter
+ // characters (as given to the constructor). The variable bindings are
+ // defined by the given map.
+ void Print(const map<string, string>& variables, const char* text);
+
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable, const string& value);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2);
+ // TODO(kenton): Overloaded versions with more variables? Two seems
+ // to be enough.
+
+ // Indent text by two spaces. After calling Indent(), two spaces will be
+ // inserted at the beginning of each line of text. Indent() may be called
+ // multiple times to produce deeper indents.
+ void Indent();
+
+ // Reduces the current indent level by two spaces, or crashes if the indent
+ // level is zero.
+ void Outdent();
+
+ // True if any write to the underlying stream failed. (We don't just
+ // crash in this case because this is an I/O failure, not a programming
+ // error.)
+ bool failed() const { return failed_; }
+
+ private:
+ // Write some text to the output buffer.
+ void Write(const char* data, int size);
+
+ const char variable_delimiter_;
+
+ ZeroCopyOutputStream* const output_;
+ char* buffer_;
+ int buffer_size_;
+
+ string indent_;
+ bool at_start_of_line_;
+ bool failed_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Printer);
+};
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_PRINTER_H__
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
new file mode 100644
index 00000000..652728bc
--- /dev/null
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -0,0 +1,210 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <vector>
+
+#include <google/protobuf/io/printer.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// Each test repeats over several block sizes in order to test both cases
+// where particular writes cross a buffer boundary and cases where they do
+// not.
+
+TEST(Printer, EmptyPrinter) {
+ char buffer[8192];
+ const int block_size = 100;
+ ArrayOutputStream output(buffer, GOOGLE_ARRAYSIZE(buffer), block_size);
+ Printer printer(&output, '\0');
+ EXPECT_TRUE(!printer.failed());
+}
+
+TEST(Printer, BasicPrinting) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '\0');
+
+ printer.Print("Hello World!");
+ printer.Print(" This is the same line.\n");
+ printer.Print("But this is a new one.\nAnd this is another one.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ(buffer,
+ "Hello World! This is the same line.\n"
+ "But this is a new one.\n"
+ "And this is another one.");
+ }
+}
+
+TEST(Printer, VariableSubstitution) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '$');
+ map<string, string> vars;
+
+ vars["foo"] = "World";
+ vars["bar"] = "$foo$";
+ vars["abcdefg"] = "1234";
+
+ printer.Print(vars, "Hello $foo$!\nbar = $bar$\n");
+ printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$");
+
+ vars["foo"] = "blah";
+ printer.Print(vars, "\nNow foo = $foo$.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ(buffer,
+ "Hello World!\n"
+ "bar = $foo$\n"
+ "1234\n"
+ "A literal dollar sign: $\n"
+ "Now foo = blah.");
+ }
+}
+
+TEST(Printer, InlineVariableSubstitution) {
+ char buffer[8192];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+
+ {
+ Printer printer(&output, '$');
+ printer.Print("Hello $foo$!\n", "foo", "World");
+ printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two");
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ(buffer,
+ "Hello World!\n"
+ "one two\n");
+}
+
+TEST(Printer, Indenting) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '$');
+ map<string, string> vars;
+
+ vars["newline"] = "\n";
+
+ printer.Print("This is not indented.\n");
+ printer.Indent();
+ printer.Print("This is indented\nAnd so is this\n");
+ printer.Outdent();
+ printer.Print("But this is not.");
+ printer.Indent();
+ printer.Print(" And this is still the same line.\n"
+ "But this is indented.\n");
+ printer.Print(vars, "Note that a newline in a variable will break "
+ "indenting, as we see$newline$here.\n");
+ printer.Indent();
+ printer.Print("And this");
+ printer.Outdent();
+ printer.Outdent();
+ printer.Print(" is double-indented\nBack to normal.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ(buffer,
+ "This is not indented.\n"
+ " This is indented\n"
+ " And so is this\n"
+ "But this is not. And this is still the same line.\n"
+ " But this is indented.\n"
+ " Note that a newline in a variable will break indenting, as we see\n"
+ "here.\n"
+ " And this is double-indented\n"
+ "Back to normal.");
+ }
+}
+
+// Death tests do not work on Windows as of yet.
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(Printer, Death) {
+ char buffer[8192];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+ Printer printer(&output, '$');
+
+ EXPECT_DEBUG_DEATH(printer.Print("$nosuchvar$"), "Undefined variable");
+ EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
+ EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+TEST(Printer, WriteFailure) {
+ char buffer[16];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+ Printer printer(&output, '$');
+
+ // Print 16 bytes to fill the buffer exactly (should not fail).
+ printer.Print("0123456789abcdef");
+ EXPECT_FALSE(printer.failed());
+
+ // Try to print one more byte (should fail).
+ printer.Print(" ");
+ EXPECT_TRUE(printer.failed());
+
+ // Should not crash
+ printer.Print("blah");
+ EXPECT_TRUE(printer.failed());
+
+ // Buffer should contain the first 16 bytes written.
+ EXPECT_EQ("0123456789abcdef", string(buffer, sizeof(buffer)));
+}
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
new file mode 100644
index 00000000..3864fcfb
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -0,0 +1,679 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Here we have a hand-written lexer. At first you might ask yourself,
+// "Hand-written text processing? Is Kenton crazy?!" Well, first of all,
+// yes I am crazy, but that's beside the point. There are actually reasons
+// why I ended up writing this this way.
+//
+// The traditional approach to lexing is to use lex to generate a lexer for
+// you. Unfortunately, lex's output is ridiculously ugly and difficult to
+// integrate cleanly with C++ code, especially abstract code or code meant
+// as a library. Better parser-generators exist but would add dependencies
+// which most users won't already have, which we'd like to avoid. (GNU flex
+// has a C++ output option, but it's still ridiculously ugly, non-abstract,
+// and not library-friendly.)
+//
+// The next approach that any good software engineer should look at is to
+// use regular expressions. And, indeed, I did. I have code which
+// implements this same class using regular expressions. It's about 200
+// lines shorter. However:
+// - Rather than error messages telling you "This string has an invalid
+// escape sequence at line 5, column 45", you get error messages like
+// "Parse error on line 5". Giving more precise errors requires adding
+// a lot of code that ends up basically as complex as the hand-coded
+// version anyway.
+// - The regular expression to match a string literal looks like this:
+// kString = new RE("(\"([^\"\\\\]|" // non-escaped
+// "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape
+// "\\\\x[0-9a-fA-F])*\"|" // hex escape
+// "\'([^\'\\\\]|" // Also support single-quotes.
+// "\\\\[abfnrtv?\"'\\\\0-7]|"
+// "\\\\x[0-9a-fA-F])*\')");
+// Verifying the correctness of this line noise is actually harder than
+// verifying the correctness of ConsumeString(), defined below. I'm not
+// even confident that the above is correct, after staring at it for some
+// time.
+// - PCRE is fast, but there's still more overhead involved than the code
+// below.
+// - Sadly, regular expressions are not part of the C standard library, so
+// using them would require depending on some other library. For the
+// open source release, this could be really annoying. Nobody likes
+// downloading one piece of software just to find that they need to
+// download something else to make it work, and in all likelihood
+// people downloading Protocol Buffers will already be doing so just
+// to make something else work. We could include a copy of PCRE with
+// our code, but that obligates us to keep it up-to-date and just seems
+// like a big waste just to save 200 lines of code.
+//
+// On a similar but unrelated note, I'm even scared to use ctype.h.
+// Apparently functions like isalpha() are locale-dependent. So, if we used
+// that, then if this code is being called from some program that doesn't
+// have its locale set to "C", it would behave strangely. We can't just set
+// the locale to "C" ourselves since we might break the calling program that
+// way, particularly if it is multi-threaded. WTF? Someone please let me
+// (Kenton) know if I'm missing something here...
+//
+// I'd love to hear about other alternatives, though, as this code isn't
+// exactly pretty.
+
+#include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/strutil.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// As mentioned above, I don't trust ctype.h due to the presence of "locales".
+// So, I have written replacement functions here. Someone please smack me if
+// this is a bad idea or if there is some way around this.
+//
+// These "character classes" are designed to be used in template methods.
+// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
+// whitespace.
+
+// Note: No class is allowed to contain '\0', since this is used to mark end-
+// of-input and is handled specially.
+
+#define CHARACTER_CLASS(NAME, EXPRESSION) \
+ class NAME { \
+ public: \
+ static inline bool InClass(char c) { \
+ return EXPRESSION; \
+ } \
+ }
+
+CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
+ c == '\r' || c == '\v');
+
+CHARACTER_CLASS(Unprintable, c < ' ' && c != '\0');
+
+CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
+CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
+CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') ||
+ ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F'));
+
+CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ (c == '_'));
+
+CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ ('0' <= c && c <= '9') ||
+ (c == '_'));
+
+CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
+ c == 'r' || c == 't' || c == 'v' || c == '\\' ||
+ c == '?' || c == '\'' || c == '\"');
+
+#undef CHARACTER_CLASS
+
+// Given a char, interpret it as a numeric digit and return its value.
+// This supports any number base up to 36.
+inline int DigitValue(char digit) {
+ if ('0' <= digit && digit <= '9') return digit - '0';
+ if ('a' <= digit && digit <= 'z') return digit - 'a' + 10;
+ if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10;
+ return -1;
+}
+
+// Inline because it's only used in one place.
+inline char TranslateEscape(char c) {
+ switch (c) {
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'v': return '\v';
+ case '\\': return '\\';
+ case '?': return '\?'; // Trigraphs = :(
+ case '\'': return '\'';
+ case '"': return '\"';
+
+ // We expect escape sequences to have been validated separately.
+ default: return '?';
+ }
+}
+
+} // anonymous namespace
+
+ErrorCollector::~ErrorCollector() {}
+
+// ===================================================================
+
+Tokenizer::Tokenizer(ZeroCopyInputStream* input,
+ ErrorCollector* error_collector)
+ : input_(input),
+ error_collector_(error_collector),
+ buffer_(NULL),
+ buffer_size_(0),
+ buffer_pos_(0),
+ read_error_(false),
+ line_(0),
+ column_(0),
+ token_start_(-1),
+ allow_f_after_float_(false),
+ comment_style_(CPP_COMMENT_STYLE) {
+
+ current_.line = 0;
+ current_.column = 0;
+ current_.type = TYPE_START;
+
+ Refresh();
+}
+
+Tokenizer::~Tokenizer() {
+ // If we had any buffer left unread, return it to the underlying stream
+ // so that someone else can read it.
+ if (buffer_size_ > buffer_pos_) {
+ input_->BackUp(buffer_size_ - buffer_pos_);
+ }
+}
+
+// -------------------------------------------------------------------
+// Internal helpers.
+
+void Tokenizer::NextChar() {
+ // Update our line and column counters based on the character being
+ // consumed.
+ if (current_char_ == '\n') {
+ ++line_;
+ column_ = 0;
+ } else if (current_char_ == '\t') {
+ column_ += kTabWidth - column_ % kTabWidth;
+ } else {
+ ++column_;
+ }
+
+ // Advance to the next character.
+ ++buffer_pos_;
+ if (buffer_pos_ < buffer_size_) {
+ current_char_ = buffer_[buffer_pos_];
+ } else {
+ Refresh();
+ }
+}
+
+void Tokenizer::Refresh() {
+ if (read_error_) {
+ current_char_ = '\0';
+ return;
+ }
+
+ // If we're in a token, append the rest of the buffer to it.
+ if (token_start_ >= 0 && token_start_ < buffer_size_) {
+ current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_);
+ token_start_ = 0;
+ }
+
+ const void* data = NULL;
+ buffer_ = NULL;
+ buffer_pos_ = 0;
+ do {
+ if (!input_->Next(&data, &buffer_size_)) {
+ // end of stream (or read error)
+ buffer_size_ = 0;
+ read_error_ = true;
+ current_char_ = '\0';
+ return;
+ }
+ } while (buffer_size_ == 0);
+
+ buffer_ = static_cast<const char*>(data);
+
+ current_char_ = buffer_[0];
+}
+
+inline void Tokenizer::StartToken() {
+ token_start_ = buffer_pos_;
+ current_.type = TYPE_START; // Just for the sake of initializing it.
+ current_.text.clear();
+ current_.line = line_;
+ current_.column = column_;
+}
+
+inline void Tokenizer::EndToken() {
+ // Note: The if() is necessary because some STL implementations crash when
+ // you call string::append(NULL, 0), presumably because they are trying to
+ // be helpful by detecting the NULL pointer, even though there's nothing
+ // wrong with reading zero bytes from NULL.
+ if (buffer_pos_ != token_start_) {
+ current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_);
+ }
+ token_start_ = -1;
+}
+
+// -------------------------------------------------------------------
+// Helper methods that consume characters.
+
+template<typename CharacterClass>
+inline bool Tokenizer::LookingAt() {
+ return CharacterClass::InClass(current_char_);
+}
+
+template<typename CharacterClass>
+inline bool Tokenizer::TryConsumeOne() {
+ if (CharacterClass::InClass(current_char_)) {
+ NextChar();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline bool Tokenizer::TryConsume(char c) {
+ if (current_char_ == c) {
+ NextChar();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+template<typename CharacterClass>
+inline void Tokenizer::ConsumeZeroOrMore() {
+ while (CharacterClass::InClass(current_char_)) {
+ NextChar();
+ }
+}
+
+template<typename CharacterClass>
+inline void Tokenizer::ConsumeOneOrMore(const char* error) {
+ if (!CharacterClass::InClass(current_char_)) {
+ AddError(error);
+ } else {
+ do {
+ NextChar();
+ } while (CharacterClass::InClass(current_char_));
+ }
+}
+
+// -------------------------------------------------------------------
+// Methods that read whole patterns matching certain kinds of tokens
+// or comments.
+
+void Tokenizer::ConsumeString(char delimiter) {
+ while (true) {
+ switch (current_char_) {
+ case '\0':
+ case '\n': {
+ AddError("String literals cannot cross line boundaries.");
+ return;
+ }
+
+ case '\\': {
+ // An escape sequence.
+ NextChar();
+ if (TryConsumeOne<Escape>()) {
+ // Valid escape sequence.
+ } else if (TryConsumeOne<OctalDigit>()) {
+ // Possibly followed by two more octal digits, but these will
+ // just be consumed by the main loop anyway so we don't need
+ // to do so explicitly here.
+ } else if (TryConsume('x') || TryConsume('X')) {
+ if (!TryConsumeOne<HexDigit>()) {
+ AddError("Expected hex digits for escape sequence.");
+ }
+ // Possibly followed by another hex digit, but again we don't care.
+ } else {
+ AddError("Invalid escape sequence in string literal.");
+ }
+ break;
+ }
+
+ default: {
+ if (current_char_ == delimiter) {
+ NextChar();
+ return;
+ }
+ NextChar();
+ break;
+ }
+ }
+ }
+}
+
+Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
+ bool started_with_dot) {
+ bool is_float = false;
+
+ if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
+ // A hex number (started with "0x").
+ ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");
+
+ } else if (started_with_zero && LookingAt<Digit>()) {
+ // An octal number (had a leading zero).
+ ConsumeZeroOrMore<OctalDigit>();
+ if (LookingAt<Digit>()) {
+ AddError("Numbers starting with leading zero must be in octal.");
+ ConsumeZeroOrMore<Digit>();
+ }
+
+ } else {
+ // A decimal number.
+ if (started_with_dot) {
+ is_float = true;
+ ConsumeZeroOrMore<Digit>();
+ } else {
+ ConsumeZeroOrMore<Digit>();
+
+ if (TryConsume('.')) {
+ is_float = true;
+ ConsumeZeroOrMore<Digit>();
+ }
+ }
+
+ if (TryConsume('e') || TryConsume('E')) {
+ is_float = true;
+ TryConsume('-') || TryConsume('+');
+ ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
+ }
+
+ if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
+ is_float = true;
+ }
+ }
+
+ if (LookingAt<Letter>()) {
+ AddError("Need space between number and identifier.");
+ } else if (current_char_ == '.') {
+ if (is_float) {
+ AddError(
+ "Already saw decimal point or exponent; can't have another one.");
+ } else {
+ AddError("Hex and octal numbers must be integers.");
+ }
+ }
+
+ return is_float ? TYPE_FLOAT : TYPE_INTEGER;
+}
+
+void Tokenizer::ConsumeLineComment() {
+ while (current_char_ != '\0' && current_char_ != '\n') {
+ NextChar();
+ }
+ TryConsume('\n');
+}
+
+void Tokenizer::ConsumeBlockComment() {
+ int start_line = line_;
+ int start_column = column_ - 2;
+
+ while (true) {
+ while (current_char_ != '\0' &&
+ current_char_ != '*' &&
+ current_char_ != '/') {
+ NextChar();
+ }
+
+ if (TryConsume('*') && TryConsume('/')) {
+ // End of comment.
+ break;
+ } else if (TryConsume('/') && current_char_ == '*') {
+ // Note: We didn't consume the '*' because if there is a '/' after it
+ // we want to interpret that as the end of the comment.
+ AddError(
+ "\"/*\" inside block comment. Block comments cannot be nested.");
+ } else if (current_char_ == '\0') {
+ AddError("End-of-file inside block comment.");
+ error_collector_->AddError(
+ start_line, start_column, " Comment started here.");
+ break;
+ }
+ }
+}
+
+// -------------------------------------------------------------------
+
+bool Tokenizer::Next() {
+ TokenType last_token_type = current_.type;
+
+ // Did we skip any characters after the last token?
+ bool skipped_stuff = false;
+
+ while (!read_error_) {
+ if (TryConsumeOne<Whitespace>()) {
+ ConsumeZeroOrMore<Whitespace>();
+
+ } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
+ // Starting a comment?
+ if (TryConsume('/')) {
+ ConsumeLineComment();
+ } else if (TryConsume('*')) {
+ ConsumeBlockComment();
+ } else {
+ // Oops, it was just a slash. Return it.
+ current_.type = TYPE_SYMBOL;
+ current_.text = "/";
+ current_.line = line_;
+ current_.column = column_ - 1;
+ return true;
+ }
+
+ } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
+ ConsumeLineComment();
+
+ } else if (LookingAt<Unprintable>() || current_char_ == '\0') {
+ AddError("Invalid control characters encountered in text.");
+ NextChar();
+ // Skip more unprintable characters, too. But, remember that '\0' is
+ // also what current_char_ is set to after EOF / read error. We have
+ // to be careful not to go into an infinite loop of trying to consume
+ // it, so make sure to check read_error_ explicitly before consuming
+ // '\0'.
+ while (TryConsumeOne<Unprintable>() ||
+ (!read_error_ && TryConsume('\0'))) {
+ // Ignore.
+ }
+
+ } else {
+ // Reading some sort of token.
+ StartToken();
+
+ if (TryConsumeOne<Letter>()) {
+ ConsumeZeroOrMore<Alphanumeric>();
+ current_.type = TYPE_IDENTIFIER;
+ } else if (TryConsume('0')) {
+ current_.type = ConsumeNumber(true, false);
+ } else if (TryConsume('.')) {
+ // This could be the beginning of a floating-point number, or it could
+ // just be a '.' symbol.
+
+ if (TryConsumeOne<Digit>()) {
+ // It's a floating-point number.
+ if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) {
+ // We don't accept syntax like "blah.123".
+ error_collector_->AddError(line_, column_ - 2,
+ "Need space between identifier and decimal point.");
+ }
+ current_.type = ConsumeNumber(false, true);
+ } else {
+ current_.type = TYPE_SYMBOL;
+ }
+ } else if (TryConsumeOne<Digit>()) {
+ current_.type = ConsumeNumber(false, false);
+ } else if (TryConsume('\"')) {
+ ConsumeString('\"');
+ current_.type = TYPE_STRING;
+ } else if (TryConsume('\'')) {
+ ConsumeString('\'');
+ current_.type = TYPE_STRING;
+ } else {
+ NextChar();
+ current_.type = TYPE_SYMBOL;
+ }
+
+ EndToken();
+ return true;
+ }
+
+ skipped_stuff = true;
+ }
+
+ // EOF
+ current_.type = TYPE_END;
+ current_.text.clear();
+ current_.line = line_;
+ current_.column = column_;
+ return false;
+}
+
+// -------------------------------------------------------------------
+// Token-parsing helpers. Remember that these don't need to report
+// errors since any errors should already have been reported while
+// tokenizing. Also, these can assume that whatever text they
+// are given is text that the tokenizer actually parsed as a token
+// of the given type.
+
+bool Tokenizer::ParseInteger(const string& text, uint64 max_value,
+ uint64* output) {
+ // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull()
+ // is non-standard. I hate the C standard library. :(
+
+// return strtoull(text.c_str(), NULL, 0);
+
+ const char* ptr = text.c_str();
+ int base = 10;
+ if (ptr[0] == '0') {
+ if (ptr[1] == 'x') {
+ // This is hex.
+ base = 16;
+ ptr += 2;
+ } else {
+ // This is octal.
+ base = 8;
+ }
+ }
+
+ uint64 result = 0;
+ for (; *ptr != '\0'; ptr++) {
+ int digit = DigitValue(*ptr);
+ GOOGLE_LOG_IF(DFATAL, digit < 0 || digit >= base)
+ << " Tokenizer::ParseInteger() passed text that could not have been"
+ " tokenized as an integer: " << CEscape(text);
+ if (digit > max_value || result > (max_value - digit) / base) {
+ // Overflow.
+ return false;
+ }
+ result = result * base + digit;
+ }
+
+ *output = result;
+ return true;
+}
+
+double Tokenizer::ParseFloat(const string& text) {
+ const char* start = text.c_str();
+ char* end;
+ double result = NoLocaleStrtod(start, &end);
+
+ // "1e" is not a valid float, but if the tokenizer reads it, it will
+ // report an error but still return it as a valid token. We need to
+ // accept anything the tokenizer could possibly return, error or not.
+ if (*end == 'e' || *end == 'E') {
+ ++end;
+ if (*end == '-' || *end == '+') ++end;
+ }
+
+ // If the Tokenizer had allow_f_after_float_ enabled, the float may be
+ // suffixed with the letter 'f'.
+ if (*end == 'f' || *end == 'F') {
+ ++end;
+ }
+
+ GOOGLE_LOG_IF(DFATAL, end - start != text.size() || *start == '-')
+ << " Tokenizer::ParseFloat() passed text that could not have been"
+ " tokenized as a float: " << CEscape(text);
+ return result;
+}
+
+void Tokenizer::ParseString(const string& text, string* output) {
+ output->clear();
+
+ // Reminder: text[0] is always the quote character. (If text is
+ // empty, it's invalid, so we'll just return.)
+ if (text.empty()) {
+ GOOGLE_LOG(DFATAL)
+ << " ParseString::ParseString() passed text that could not have been"
+ " tokenized as a string: " << CEscape(text);
+ return;
+ }
+
+ output->reserve(text.size());
+
+ // Loop through the string copying characters to "output" and
+ // interpreting escape sequences. Note that any invalid escape
+ // sequences or other errors were already reported while tokenizing.
+ // In this case we do not need to produce valid results.
+ for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) {
+ if (*ptr == '\\' && ptr[1] != '\0') {
+ // An escape sequence.
+ ++ptr;
+
+ if (OctalDigit::InClass(*ptr)) {
+ // An octal escape. May one, two, or three digits.
+ int code = DigitValue(*ptr);
+ if (OctalDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ if (OctalDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ output->push_back(static_cast<char>(code));
+
+ } else if (*ptr == 'x') {
+ // A hex escape. May zero, one, or two digits. (The zero case
+ // will have been caught as an error earlier.)
+ int code = 0;
+ if (HexDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = DigitValue(*ptr);
+ }
+ if (HexDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 16 + DigitValue(*ptr);
+ }
+ output->push_back(static_cast<char>(code));
+
+ } else {
+ // Some other escape code.
+ output->push_back(TranslateEscape(*ptr));
+ }
+
+ } else if (*ptr == text[0]) {
+ // Ignore quote matching the starting quote.
+ } else {
+ output->push_back(*ptr);
+ }
+ }
+
+ return;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
new file mode 100644
index 00000000..841564ce
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer.h
@@ -0,0 +1,276 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Class for parsing tokenized text from a ZeroCopyInputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__
+#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+class ZeroCopyInputStream; // zero_copy_stream.h
+
+// Defined in this file.
+class ErrorCollector;
+class Tokenizer;
+
+// Abstract interface for an object which collects the errors that occur
+// during parsing. A typical implementation might simply print the errors
+// to stdout.
+class LIBPROTOBUF_EXPORT ErrorCollector {
+ public:
+ inline ErrorCollector() {}
+ virtual ~ErrorCollector();
+
+ // Indicates that there was an error in the input at the given line and
+ // column numbers. The numbers are zero-based, so you may want to add
+ // 1 to each before printing them.
+ virtual void AddError(int line, int column, const string& message) = 0;
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
+};
+
+// This class converts a stream of raw text into a stream of tokens for
+// the protocol definition parser to parse. The tokens recognized are
+// similar to those that make up the C language; see the TokenType enum for
+// precise descriptions. Whitespace and comments are skipped. By default,
+// C- and C++-style comments are recognized, but other styles can be used by
+// calling set_comment_style().
+class LIBPROTOBUF_EXPORT Tokenizer {
+ public:
+ // Construct a Tokenizer that reads and tokenizes text from the given
+ // input stream and writes errors to the given error_collector.
+ // The caller keeps ownership of input and error_collector.
+ Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector);
+ ~Tokenizer();
+
+ enum TokenType {
+ TYPE_START, // Next() has not yet been called.
+ TYPE_END, // End of input reached. "text" is empty.
+
+ TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not
+ // starting with a digit. It is an error for a number
+ // to be followed by an identifier with no space in
+ // between.
+ TYPE_INTEGER, // A sequence of digits representing an integer. Normally
+ // the digits are decimal, but a prefix of "0x" indicates
+ // a hex number and a leading zero indicates octal, just
+ // like with C numeric literals. A leading negative sign
+ // is NOT included in the token; it's up to the parser to
+ // interpret the unary minus operator on its own.
+ TYPE_FLOAT, // A floating point literal, with a fractional part and/or
+ // an exponent. Always in decimal. Again, never
+ // negative.
+ TYPE_STRING, // A quoted sequence of escaped characters. Either single
+ // or double quotes can be used, but they must match.
+ // A string literal cannot cross a line break.
+ TYPE_SYMBOL, // Any other printable character, like '!' or '+'.
+ // Symbols are always a single character, so "!+$%" is
+ // four tokens.
+ };
+
+ // Structure representing a token read from the token stream.
+ struct Token {
+ TokenType type;
+ string text; // The exact text of the token as it appeared in
+ // the input. e.g. tokens of TYPE_STRING will still
+ // be escaped and in quotes.
+
+ // "line" and "column" specify the position of the first character of
+ // the token within the input stream. They are zero-based.
+ int line;
+ int column;
+ };
+
+ // Get the current token. This is updated when Next() is called. Before
+ // the first call to Next(), current() has type TYPE_START and no contents.
+ const Token& current();
+
+ // Advance to the next token. Returns false if the end of the input is
+ // reached.
+ bool Next();
+
+ // Parse helpers ---------------------------------------------------
+
+ // Parses a TYPE_FLOAT token. This never fails, so long as the text actually
+ // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the
+ // result is undefined (possibly an assert failure).
+ static double ParseFloat(const string& text);
+
+ // Parses a TYPE_STRING token. This never fails, so long as the text actually
+ // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
+ // result is undefined (possibly an assert failure).
+ static void ParseString(const string& text, string* output);
+
+ // Parses a TYPE_INTEGER token. Returns false if the result would be
+ // greater than max_value. Otherwise, returns true and sets *output to the
+ // result. If the text is not from a Token of type TYPE_INTEGER originally
+ // parsed by a Tokenizer, the result is undefined (possibly an assert
+ // failure).
+ static bool ParseInteger(const string& text, uint64 max_value,
+ uint64* output);
+
+ // Options ---------------------------------------------------------
+
+ // Set true to allow floats to be suffixed with the letter 'f'. Tokens
+ // which would otherwise be integers but which have the 'f' suffix will be
+ // forced to be interpreted as floats. For all other purposes, the 'f' is
+ // ignored.
+ void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; }
+
+ // Valid values for set_comment_style().
+ enum CommentStyle {
+ // Line comments begin with "//", block comments are delimited by "/*" and
+ // "*/".
+ CPP_COMMENT_STYLE,
+ // Line comments begin with "#". No way to write block comments.
+ SH_COMMENT_STYLE
+ };
+
+ // Sets the comment style.
+ void set_comment_style(CommentStyle style) { comment_style_ = style; }
+
+ // -----------------------------------------------------------------
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
+
+ Token current_; // Returned by current().
+
+ ZeroCopyInputStream* input_;
+ ErrorCollector* error_collector_;
+
+ char current_char_; // == buffer_[buffer_pos_], updated by NextChar().
+ const char* buffer_; // Current buffer returned from input_.
+ int buffer_size_; // Size of buffer_.
+ int buffer_pos_; // Current position within the buffer.
+ bool read_error_; // Did we previously encounter a read error?
+
+ // Line and column number of current_char_ within the whole input stream.
+ int line_;
+ int column_;
+
+ // Position in buffer_ where StartToken() was called. If the token
+ // started in the previous buffer, this is zero, and current_.text already
+ // contains the part of the token from the previous buffer. If not
+ // currently parsing a token, this is -1.
+ int token_start_;
+
+ // Options.
+ bool allow_f_after_float_;
+ CommentStyle comment_style_;
+
+ // Since we count columns we need to interpret tabs somehow. We'll take
+ // the standard 8-character definition for lack of any way to do better.
+ static const int kTabWidth = 8;
+
+ // -----------------------------------------------------------------
+ // Helper methods.
+
+ // Consume this character and advance to the next one.
+ void NextChar();
+
+ // Read a new buffer from the input.
+ void Refresh();
+
+ // Called when the current character is the first character of a new
+ // token (not including whitespace or comments).
+ inline void StartToken();
+ // Called when the current character is the first character after the
+ // end of the last token. After this returns, current_.text will
+ // contain all text consumed since StartToken() was called.
+ inline void EndToken();
+
+ // Convenience method to add an error at the current line and column.
+ void AddError(const string& message) {
+ error_collector_->AddError(line_, column_, message);
+ }
+
+ // -----------------------------------------------------------------
+ // The following four methods are used to consume tokens of specific
+ // types. They are actually used to consume all characters *after*
+ // the first, since the calling function consumes the first character
+ // in order to decide what kind of token is being read.
+
+ // Read and consume a string, ending when the given delimiter is
+ // consumed.
+ void ConsumeString(char delimiter);
+
+ // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER
+ // depending on what was read. This needs to know if the first
+ // character was a zero in order to correctly recognize hex and octal
+ // numbers.
+ // It also needs to know if the first characted was a . to parse floating
+ // point correctly.
+ TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot);
+
+ // Consume the rest of a line.
+ void ConsumeLineComment();
+ // Consume until "*/".
+ void ConsumeBlockComment();
+
+ // -----------------------------------------------------------------
+ // These helper methods make the parsing code more readable. The
+ // "character classes" refered to are defined at the top of the .cc file.
+ // Basically it is a C++ class with one method:
+ // static bool InClass(char c);
+ // The method returns true if c is a member of this "class", like "Letter"
+ // or "Digit".
+
+ // Returns true if the current character is of the given character
+ // class, but does not consume anything.
+ template<typename CharacterClass>
+ inline bool LookingAt();
+
+ // If the current character is in the given class, consume it and return
+ // true. Otherwise return false.
+ // e.g. TryConsumeOne<Letter>()
+ template<typename CharacterClass>
+ inline bool TryConsumeOne();
+
+ // Like above, but try to consume the specific character indicated.
+ inline bool TryConsume(char c);
+
+ // Consume zero or more of the given character class.
+ template<typename CharacterClass>
+ inline void ConsumeZeroOrMore();
+
+ // Consume one or more of the given character class or log the given
+ // error message.
+ // e.g. ConsumeOneOrMore<Digit>("Expected digits.");
+ template<typename CharacterClass>
+ inline void ConsumeOneOrMore(const char* error);
+};
+
+// inline methods ====================================================
+inline const Tokenizer::Token& Tokenizer::current() {
+ return current_;
+}
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
new file mode 100644
index 00000000..e2cede7e
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -0,0 +1,706 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <vector>
+#include <math.h>
+#include <limits.h>
+
+#include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/substitute.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// ===================================================================
+// Data-Driven Test Infrastructure
+
+// TODO(kenton): This is copied from coded_stream_unittest. This is
+// temporary until these fetaures are integrated into gTest itself.
+
+// TEST_1D and TEST_2D are macros I'd eventually like to see added to
+// gTest. These macros can be used to declare tests which should be
+// run multiple times, once for each item in some input array. TEST_1D
+// tests all cases in a single input array. TEST_2D tests all
+// combinations of cases from two arrays. The arrays must be statically
+// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example:
+//
+// int kCases[] = {1, 2, 3, 4}
+// TEST_1D(MyFixture, MyTest, kCases) {
+// EXPECT_GT(kCases_case, 0);
+// }
+//
+// This test iterates through the numbers 1, 2, 3, and 4 and tests that
+// they are all grater than zero. In case of failure, the exact case
+// which failed will be printed. The case type must be printable using
+// ostream::operator<<.
+
+#define TEST_1D(FIXTURE, NAME, CASES) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType> \
+ void DoSingleCase(const CaseType& CASES##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES " case #" << i << ": " << CASES[i]); \
+ DoSingleCase(CASES[i]); \
+ } \
+ } \
+ \
+ template <typename CaseType> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
+
+#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType1, typename CaseType2> \
+ void DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \
+ for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
+ << #CASES2 " case #" << j << ": " << CASES2[j]); \
+ DoSingleCase(CASES1[i], CASES2[j]); \
+ } \
+ } \
+ } \
+ \
+ template <typename CaseType1, typename CaseType2> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case)
+
+// -------------------------------------------------------------------
+
+// An input stream that is basically like an ArrayInputStream but sometimes
+// returns empty buffers, just to throw us off.
+class TestInputStream : public ZeroCopyInputStream {
+ public:
+ TestInputStream(const void* data, int size, int block_size)
+ : array_stream_(data, size, block_size), counter_(0) {}
+ ~TestInputStream() {}
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size) {
+ // We'll return empty buffers starting with the first buffer, and every
+ // 3 and 5 buffers after that.
+ if (counter_ % 3 == 0 || counter_ % 5 == 0) {
+ *data = NULL;
+ *size = 0;
+ ++counter_;
+ return true;
+ } else {
+ ++counter_;
+ return array_stream_.Next(data, size);
+ }
+ }
+
+ void BackUp(int count) { return array_stream_.BackUp(count); }
+ bool Skip(int count) { return array_stream_.Skip(count); }
+ int64 ByteCount() const { return array_stream_.ByteCount(); }
+
+ private:
+ ArrayInputStream array_stream_;
+ int counter_;
+};
+
+// -------------------------------------------------------------------
+
+// An error collector which simply concatenates all its errors into a big
+// block of text which can be checked.
+class TestErrorCollector : public ErrorCollector {
+ public:
+ TestErrorCollector() {}
+ ~TestErrorCollector() {}
+
+ string text_;
+
+ // implements ErrorCollector ---------------------------------------
+ void AddError(int line, int column, const string& message) {
+ strings::SubstituteAndAppend(&text_, "$0:$1: $2\n",
+ line, column, message);
+ }
+};
+
+// -------------------------------------------------------------------
+
+// We test each operation over a variety of block sizes to insure that
+// we test cases where reads cross buffer boundaries as well as cases
+// where they don't. This is sort of a brute-force approach to this,
+// but it's easy to write and easy to understand.
+const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+
+class TokenizerTest : public testing::Test {
+ protected:
+ // For easy testing.
+ uint64 ParseInteger(const string& text) {
+ uint64 result;
+ EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result));
+ return result;
+ }
+};
+
+// ===================================================================
+
+// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+// In each test case, the entire input text should parse as a single token
+// of the given type.
+struct SimpleTokenCase {
+ string input;
+ Tokenizer::TokenType type;
+};
+
+inline ostream& operator<<(ostream& out,
+ const SimpleTokenCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+SimpleTokenCase kSimpleTokenCases[] = {
+ // Test identifiers.
+ { "hello", Tokenizer::TYPE_IDENTIFIER },
+
+ // Test integers.
+ { "123", Tokenizer::TYPE_INTEGER },
+ { "0xab6", Tokenizer::TYPE_INTEGER },
+ { "0XAB6", Tokenizer::TYPE_INTEGER },
+ { "0X1234567", Tokenizer::TYPE_INTEGER },
+ { "0x89abcdef", Tokenizer::TYPE_INTEGER },
+ { "0x89ABCDEF", Tokenizer::TYPE_INTEGER },
+ { "01234567", Tokenizer::TYPE_INTEGER },
+
+ // Test floats.
+ { "123.45", Tokenizer::TYPE_FLOAT },
+ { "1.", Tokenizer::TYPE_FLOAT },
+ { "1e3", Tokenizer::TYPE_FLOAT },
+ { "1E3", Tokenizer::TYPE_FLOAT },
+ { "1e-3", Tokenizer::TYPE_FLOAT },
+ { "1e+3", Tokenizer::TYPE_FLOAT },
+ { "1.e3", Tokenizer::TYPE_FLOAT },
+ { "1.2e3", Tokenizer::TYPE_FLOAT },
+ { ".1", Tokenizer::TYPE_FLOAT },
+ { ".1e3", Tokenizer::TYPE_FLOAT },
+ { ".1e-3", Tokenizer::TYPE_FLOAT },
+ { ".1e+3", Tokenizer::TYPE_FLOAT },
+
+ // Test strings.
+ { "'hello'", Tokenizer::TYPE_STRING },
+ { "\"foo\"", Tokenizer::TYPE_STRING },
+ { "'a\"b'", Tokenizer::TYPE_STRING },
+ { "\"a'b\"", Tokenizer::TYPE_STRING },
+ { "'a\\'b'", Tokenizer::TYPE_STRING },
+ { "\"a\\\"b\"", Tokenizer::TYPE_STRING },
+ { "'\\xf'", Tokenizer::TYPE_STRING },
+ { "'\\0'", Tokenizer::TYPE_STRING },
+
+ // Test symbols.
+ { "+", Tokenizer::TYPE_SYMBOL },
+ { ".", Tokenizer::TYPE_SYMBOL },
+};
+
+TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kSimpleTokenCases_case.input.data(),
+ kSimpleTokenCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+
+ // Parse the token.
+ ASSERT_TRUE(tokenizer.Next());
+
+ // Check that it has the right type.
+ EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type);
+ // Check that it contains the complete input text.
+ EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text);
+ // Check that it is located at the beginning of the input
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+
+ // After Next() returns false, the token should have type TYPE_END.
+ EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
+
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {
+ // Test the "allow_f_after_float" option.
+
+ // Set up the tokenizer.
+ const char* text = "1f 2.5f 6e3f 7F";
+ TestInputStream input(text, strlen(text), kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+ tokenizer.set_allow_f_after_float(true);
+
+ // Advance through tokens and check that they are parsed as expected.
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "1f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "2.5f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "6e3f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "7F");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+// In each case, the input is parsed to produce a list of tokens. The
+// last token in "output" must have type TYPE_END.
+struct MultiTokenCase {
+ string input;
+ Tokenizer::Token output[10]; // The compiler wants a constant array
+ // size for initialization to work. There
+ // is no reason this can't be increased if
+ // needed.
+};
+
+inline ostream& operator<<(ostream& out,
+ const MultiTokenCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+MultiTokenCase kMultiTokenCases[] = {
+ // Test empty input.
+ { "", {
+ { Tokenizer::TYPE_END , "" , 0, 0 },
+ }},
+
+ // Test all token types at the same time.
+ { "foo 1 1.2 + 'bar'", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0 },
+ { Tokenizer::TYPE_INTEGER , "1" , 0, 4 },
+ { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 10 },
+ { Tokenizer::TYPE_STRING , "'bar'", 0, 12 },
+ { Tokenizer::TYPE_END , "" , 0, 17 },
+ }},
+
+ // Test that consecutive symbols are parsed as separate tokens.
+ { "!@+%", {
+ { Tokenizer::TYPE_SYMBOL , "!" , 0, 0 },
+ { Tokenizer::TYPE_SYMBOL , "@" , 0, 1 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 2 },
+ { Tokenizer::TYPE_SYMBOL , "%" , 0, 3 },
+ { Tokenizer::TYPE_END , "" , 0, 4 },
+ }},
+
+ // Test that newlines affect line numbers correctly.
+ { "foo bar\nrab oof", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4 },
+ { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4 },
+ { Tokenizer::TYPE_END , "" , 1, 7 },
+ }},
+
+ // Test that tabs affect column numbers correctly.
+ { "foo\tbar \tbaz", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 },
+ { Tokenizer::TYPE_END , "" , 0, 19 },
+ }},
+
+ // Test that line comments are ignored.
+ { "foo // This is a comment\n"
+ "bar // This is another comment", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0 },
+ { Tokenizer::TYPE_END , "" , 1, 30 },
+ }},
+
+ // Test that block comments are ignored.
+ { "foo /* This is a block comment */ bar", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 },
+ { Tokenizer::TYPE_END , "" , 0, 37 },
+ }},
+
+ // Test that sh-style comments are not ignored by default.
+ { "foo # bar\n"
+ "baz", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 },
+ { Tokenizer::TYPE_SYMBOL , "#" , 0, 4 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 },
+ { Tokenizer::TYPE_END , "" , 1, 3 },
+ }},
+};
+
+TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kMultiTokenCases_case.input.data(),
+ kMultiTokenCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+
+ // Loop through all expected tokens.
+ int i = 0;
+ Tokenizer::Token token;
+ do {
+ token = kMultiTokenCases_case.output[i++];
+
+ SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);
+
+ // Next() should only return false when it hits the end token.
+ if (token.type != Tokenizer::TYPE_END) {
+ ASSERT_TRUE(tokenizer.Next());
+ } else {
+ ASSERT_FALSE(tokenizer.Next());
+ }
+
+ // Check that the token matches the expected one.
+ EXPECT_EQ(token.type, tokenizer.current().type);
+ EXPECT_EQ(token.text, tokenizer.current().text);
+ EXPECT_EQ(token.line, tokenizer.current().line);
+ EXPECT_EQ(token.column, tokenizer.current().column);
+
+ } while (token.type != Tokenizer::TYPE_END);
+
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
+ // Test the "comment_style" option.
+
+ const char* text = "foo # bar\n"
+ "baz // qux\n"
+ "corge /* grault */\n"
+ "garply";
+ const char* const kTokens[] = {"foo", // "# bar" is ignored
+ "baz", "/", "/", "qux",
+ "corge", "/", "*", "grault", "*", "/",
+ "garply"};
+
+ // Set up the tokenizer.
+ TestInputStream input(text, strlen(text), kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+ tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE);
+
+ // Advance through tokens and check that they are parsed as expected.
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) {
+ EXPECT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, kTokens[i]);
+ }
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+// Test parse helpers. It's not really worth setting up a full data-driven
+// test here.
+TEST_F(TokenizerTest, ParseInteger) {
+ EXPECT_EQ(0, ParseInteger("0"));
+ EXPECT_EQ(123, ParseInteger("123"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12"));
+ EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF"));
+ EXPECT_EQ(01234567, ParseInteger("01234567"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_EQ(0, ParseInteger("0x"));
+
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_DEBUG_DEATH(ParseInteger("zxy"),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(ParseInteger("1.2"),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(ParseInteger("08"),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(ParseInteger("0xg"),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(ParseInteger("-1"),
+ "passed text that could not have been tokenized as an integer");
+#endif // GTEST_HAS_DEATH_TEST
+
+ // Test overflows.
+ uint64 i;
+ EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("1", 1, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("12345", 12345, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF" , kuint64max, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i));
+}
+
+TEST_F(TokenizerTest, ParseFloat) {
+ EXPECT_DOUBLE_EQ(1 , Tokenizer::ParseFloat("1."));
+ EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1e3"));
+ EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1E3"));
+ EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3"));
+ EXPECT_DOUBLE_EQ(.1 , Tokenizer::ParseFloat(".1"));
+ EXPECT_DOUBLE_EQ(.25 , Tokenizer::ParseFloat(".25"));
+ EXPECT_DOUBLE_EQ(.1e3 , Tokenizer::ParseFloat(".1e3"));
+ EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3"));
+ EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3"));
+ EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3"));
+ EXPECT_DOUBLE_EQ(5 , Tokenizer::ParseFloat("5"));
+ EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12"));
+ EXPECT_DOUBLE_EQ(1.2 , Tokenizer::ParseFloat("1.2"));
+ EXPECT_DOUBLE_EQ(1.e2 , Tokenizer::ParseFloat("1.e2"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e"));
+
+ // Test 'f' suffix.
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F"));
+
+ // These should parse successfully even though they are out of range.
+ // Overflows become infinity and underflows become zero.
+ EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
+ EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
+
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("1-e0"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
+ "passed text that could not have been tokenized as a float");
+#endif // GTEST_HAS_DEATH_TEST
+}
+
+TEST_F(TokenizerTest, ParseString) {
+ string output;
+ Tokenizer::ParseString("'hello'", &output);
+ EXPECT_EQ("hello", output);
+ Tokenizer::ParseString("\"blah\\nblah2\"", &output);
+ EXPECT_EQ("blah\nblah2", output);
+ Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output);
+ EXPECT_EQ("\1x\1\123\739\52\334n\3", output);
+ Tokenizer::ParseString("'\\x20\\x4'", &output);
+ EXPECT_EQ("\x20\x4", output);
+
+ // Test invalid strings that may still be tokenized as strings.
+ Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid
+ EXPECT_EQ("\a?\v\t", output);
+ Tokenizer::ParseString("'", &output);
+ EXPECT_EQ("", output);
+ Tokenizer::ParseString("'\\", &output);
+ EXPECT_EQ("\\", output);
+
+ // Test invalid strings that will never be tokenized as strings.
+#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
+ "passed text that could not have been tokenized as a string");
+#endif // GTEST_HAS_DEATH_TEST
+}
+
+// -------------------------------------------------------------------
+
+// Each case parses some input text, ignoring the tokens produced, and
+// checks that the error output matches what is expected.
+struct ErrorCase {
+ string input;
+ bool recoverable; // True if the tokenizer should be able to recover and
+ // parse more tokens after seeing this error. Cases
+ // for which this is true must end with "foo" as
+ // the last token, which the test will check for.
+ const char* errors;
+};
+
+inline ostream& operator<<(ostream& out,
+ const ErrorCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+ErrorCase kErrorCases[] = {
+ // String errors.
+ { "'\\l' foo", true,
+ "0:2: Invalid escape sequence in string literal.\n" },
+ { "'\\x' foo", true,
+ "0:3: Expected hex digits for escape sequence.\n" },
+ { "'foo", false,
+ "0:4: String literals cannot cross line boundaries.\n" },
+ { "'bar\nfoo", true,
+ "0:4: String literals cannot cross line boundaries.\n" },
+
+ // Integer errors.
+ { "123foo", true,
+ "0:3: Need space between number and identifier.\n" },
+
+ // Hex/octal errors.
+ { "0x foo", true,
+ "0:2: \"0x\" must be followed by hex digits.\n" },
+ { "0541823 foo", true,
+ "0:4: Numbers starting with leading zero must be in octal.\n" },
+ { "0x123z foo", true,
+ "0:5: Need space between number and identifier.\n" },
+ { "0x123.4 foo", true,
+ "0:5: Hex and octal numbers must be integers.\n" },
+ { "0123.4 foo", true,
+ "0:4: Hex and octal numbers must be integers.\n" },
+
+ // Float errors.
+ { "1e foo", true,
+ "0:2: \"e\" must be followed by exponent.\n" },
+ { "1e- foo", true,
+ "0:3: \"e\" must be followed by exponent.\n" },
+ { "1.2.3 foo", true,
+ "0:3: Already saw decimal point or exponent; can't have another one.\n" },
+ { "1e2.3 foo", true,
+ "0:3: Already saw decimal point or exponent; can't have another one.\n" },
+ { "a.1 foo", true,
+ "0:1: Need space between identifier and decimal point.\n" },
+ // allow_f_after_float not enabled, so this should be an error.
+ { "1.0f foo", true,
+ "0:3: Need space between number and identifier.\n" },
+
+ // Block comment errors.
+ { "/*", false,
+ "0:2: End-of-file inside block comment.\n"
+ "0:0: Comment started here.\n"},
+ { "/*/*/ foo", true,
+ "0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"},
+
+ // Control characters. Multiple consecutive control characters should only
+ // produce one error.
+ { "\b foo", true,
+ "0:0: Invalid control characters encountered in text.\n" },
+ { "\b\b foo", true,
+ "0:0: Invalid control characters encountered in text.\n" },
+
+ // Check that control characters at end of input don't result in an
+ // infinite loop.
+ { "\b", false,
+ "0:0: Invalid control characters encountered in text.\n" },
+
+ // Check recovery from '\0'. We have to explicitly specify the length of
+ // these strings because otherwise the string constructor will just call
+ // strlen() which will see the first '\0' and think that is the end of the
+ // string.
+ { string("\0foo", 4), true,
+ "0:0: Invalid control characters encountered in text.\n" },
+ { string("\0\0foo", 5), true,
+ "0:0: Invalid control characters encountered in text.\n" },
+};
+
+TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kErrorCases_case.input.data(),
+ kErrorCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Ignore all input, except remember if the last token was "foo".
+ bool last_was_foo = false;
+ while (tokenizer.Next()) {
+ last_was_foo = tokenizer.current().text == "foo";
+ }
+
+ // Check that the errors match what was expected.
+ EXPECT_EQ(error_collector.text_, kErrorCases_case.errors);
+
+ // If the error was recoverable, make sure we saw "foo" after it.
+ if (kErrorCases_case.recoverable) {
+ EXPECT_TRUE(last_was_foo);
+ }
+}
+
+// -------------------------------------------------------------------
+
+TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
+ string text = "foo bar";
+ TestInputStream input(text.data(), text.size(), kBlockSizes_case);
+
+ // Create a tokenizer, read one token, then destroy it.
+ {
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ tokenizer.Next();
+ }
+
+ // Only "foo" should have been read.
+ EXPECT_EQ(strlen("foo"), input.ByteCount());
+}
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
new file mode 100644
index 00000000..80559c4a
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -0,0 +1,34 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <google/protobuf/io/zero_copy_stream.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+ZeroCopyInputStream::~ZeroCopyInputStream() {}
+ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
+
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
new file mode 100644
index 00000000..bce5f2d3
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -0,0 +1,224 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the ZeroCopyInputStream and ZeroCopyOutputStream
+// interfaces, which represent abstract I/O streams to and from which
+// protocol buffers can be read and written. For a few simple
+// implementations of these interfaces, see zero_copy_stream_impl.h.
+//
+// These interfaces are different from classic I/O streams in that they
+// try to minimize the amount of data copying that needs to be done.
+// To accomplish this, responsibility for allocating buffers is moved to
+// the stream object, rather than being the responsibility of the caller.
+// So, the stream can return a buffer which actually points directly into
+// the final data structure where the bytes are to be stored, and the caller
+// can interact directly with that buffer, eliminating an intermediate copy
+// operation.
+//
+// As an example, consider the common case in which you are reading bytes
+// from an array that is already in memory (or perhaps an mmap()ed file).
+// With classic I/O streams, you would do something like:
+// char buffer[BUFFER_SIZE];
+// input->Read(buffer, BUFFER_SIZE);
+// DoSomething(buffer, BUFFER_SIZE);
+// Then, the stream basically just calls memcpy() to copy the data from
+// the array into your buffer. With a ZeroCopyInputStream, you would do
+// this instead:
+// const void* buffer;
+// int size;
+// input->Next(&buffer, &size);
+// DoSomething(buffer, size);
+// Here, no copy is performed. The input stream returns a pointer directly
+// into the backing array, and the caller ends up reading directly from it.
+//
+// If you want to be able to read the old-fashion way, you can create
+// a CodedInputStream or CodedOutputStream wrapping these objects and use
+// their ReadRaw()/WriteRaw() methods. These will, of course, add a copy
+// step, but Coded*Stream will handle buffering so at least it will be
+// reasonably efficient.
+//
+// ZeroCopyInputStream example:
+// // Read in a file and print its contents to stdout.
+// int fd = open("myfile", O_RDONLY);
+// ZeroCopyInputStream* input = new FileInputStream(fd);
+//
+// const void* buffer;
+// int size;
+// while (input->Next(&buffer, &size)) {
+// cout.write(buffer, size);
+// }
+//
+// delete input;
+// close(fd);
+//
+// ZeroCopyOutputStream example:
+// // Copy the contents of "infile" to "outfile", using plain read() for
+// // "infile" but a ZeroCopyOutputStream for "outfile".
+// int infd = open("infile", O_RDONLY);
+// int outfd = open("outfile", O_WRONLY);
+// ZeroCopyInputStream* output = new FileOutputStream(outfd);
+//
+// void* buffer;
+// int size;
+// while (output->Next(&buffer, &size)) {
+// int bytes = read(infd, buffer, size);
+// if (bytes < size) {
+// // Reached EOF.
+// output->BackUp(size - bytes);
+// break;
+// }
+// }
+//
+// delete output;
+// close(infd);
+// close(outfd);
+
+#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+namespace io {
+
+// Defined in this file.
+class ZeroCopyInputStream;
+class ZeroCopyOutputStream;
+
+// Abstract interface similar to an input stream but designed to minimize
+// copying.
+class LIBPROTOBUF_EXPORT ZeroCopyInputStream {
+ public:
+ inline ZeroCopyInputStream() {}
+ virtual ~ZeroCopyInputStream();
+
+ // Obtains a chunk of data from the stream.
+ //
+ // Preconditions:
+ // * "size" and "data" are not NULL.
+ //
+ // Postconditions:
+ // * If the returned value is false, there is no more data to return or
+ // an error occurred. All errors are permanent.
+ // * Otherwise, "size" points to the actual number of bytes read and "data"
+ // points to a pointer to a buffer containing these bytes.
+ // * Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ // * It is legal for the returned buffer to have zero size, as long
+ // as repeatedly calling Next() eventually yields a buffer with non-zero
+ // size.
+ virtual bool Next(const void** data, int* size) = 0;
+
+ // Backs up a number of bytes, so that the next call to Next() returns
+ // data again that was already returned by the last call to Next(). This
+ // is useful when writing procedures that are only supposed to read up
+ // to a certain point in the input, then return. If Next() returns a
+ // buffer that goes beyond what you wanted to read, you can use BackUp()
+ // to return to the point where you intended to finish.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // pushed back into the stream. Subsequent calls to Next() will return
+ // the same data again before producing new data.
+ virtual void BackUp(int count) = 0;
+
+ // Skips a number of bytes. Returns false if the end of the stream is
+ // reached or some input error occurred. In the end-of-stream case, the
+ // stream is advanced to the end of the stream (so ByteCount() will return
+ // the total size of the stream).
+ virtual bool Skip(int count) = 0;
+
+ // Returns the total number of bytes read since this object was created.
+ virtual int64 ByteCount() const = 0;
+
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyInputStream);
+};
+
+// Abstract interface similar to an output stream but designed to minimize
+// copying.
+class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
+ public:
+ inline ZeroCopyOutputStream() {}
+ virtual ~ZeroCopyOutputStream();
+
+ // Obtains a buffer into which data can be written. Any data written
+ // into this buffer will eventually (maybe instantly, maybe later on)
+ // be written to the output.
+ //
+ // Preconditions:
+ // * "size" and "data" are not NULL.
+ //
+ // Postconditions:
+ // * If the returned value is false, an error occurred. All errors are
+ // permanent.
+ // * Otherwise, "size" points to the actual number of bytes in the buffer
+ // and "data" points to the buffer.
+ // * Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ // * Any data which the caller stores in this buffer will eventually be
+ // written to the output (unless BackUp() is called).
+ // * It is legal for the returned buffer to have zero size, as long
+ // as repeatedly calling Next() eventually yields a buffer with non-zero
+ // size.
+ virtual bool Next(void** data, int* size) = 0;
+
+ // Backs up a number of bytes, so that the end of the last buffer returned
+ // by Next() is not actually written. This is needed when you finish
+ // writing all the data you want to write, but the last buffer was bigger
+ // than you needed. You don't want to write a bunch of garbage after the
+ // end of your data, so you use BackUp() to back up.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ // * The caller must not have written anything to the last "count" bytes
+ // of that buffer.
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // ignored.
+ virtual void BackUp(int count) = 0;
+
+ // Returns the total number of bytes written since this object was created.
+ virtual int64 ByteCount() const = 0;
+
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
+};
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
new file mode 100644
index 00000000..7ff84460
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -0,0 +1,793 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+#include <errno.h>
+#include <iostream>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util-inl.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+#ifdef _WIN32
+// Win32 lseek is broken: If invoked on a non-seekable file descriptor, its
+// return value is undefined. We re-define it to always produce an error.
+#define lseek(fd, offset, origin) ((off_t)-1)
+#endif
+
+namespace {
+
+
+// EINTR sucks.
+int close_no_eintr(int fd) {
+ int result;
+ do {
+ result = close(fd);
+ } while (result < 0 && errno == EINTR);
+ return result;
+}
+
+// Default block size for Copying{In,Out}putStreamAdaptor.
+static const int kDefaultBlockSize = 8192;
+
+} // namespace
+
+// ===================================================================
+
+ArrayInputStream::ArrayInputStream(const void* data, int size,
+ int block_size)
+ : data_(reinterpret_cast<const uint8*>(data)),
+ size_(size),
+ block_size_(block_size > 0 ? block_size : size),
+ position_(0),
+ last_returned_size_(0) {
+}
+
+ArrayInputStream::~ArrayInputStream() {
+}
+
+bool ArrayInputStream::Next(const void** data, int* size) {
+ if (position_ < size_) {
+ last_returned_size_ = min(block_size_, size_ - position_);
+ *data = data_ + position_;
+ *size = last_returned_size_;
+ position_ += last_returned_size_;
+ return true;
+ } else {
+ // We're at the end of the array.
+ last_returned_size_ = 0; // Don't let caller back up.
+ return false;
+ }
+}
+
+void ArrayInputStream::BackUp(int count) {
+ GOOGLE_CHECK_GT(last_returned_size_, 0)
+ << "BackUp() can only be called after a successful Next().";
+ GOOGLE_CHECK_LE(count, last_returned_size_);
+ GOOGLE_CHECK_GE(count, 0);
+ position_ -= count;
+ last_returned_size_ = 0; // Don't let caller back up further.
+}
+
+bool ArrayInputStream::Skip(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ last_returned_size_ = 0; // Don't let caller back up.
+ if (count > size_ - position_) {
+ position_ = size_;
+ return false;
+ } else {
+ position_ += count;
+ return true;
+ }
+}
+
+int64 ArrayInputStream::ByteCount() const {
+ return position_;
+}
+
+
+// ===================================================================
+
+ArrayOutputStream::ArrayOutputStream(void* data, int size, int block_size)
+ : data_(reinterpret_cast<uint8*>(data)),
+ size_(size),
+ block_size_(block_size > 0 ? block_size : size),
+ position_(0),
+ last_returned_size_(0) {
+}
+
+ArrayOutputStream::~ArrayOutputStream() {
+}
+
+bool ArrayOutputStream::Next(void** data, int* size) {
+ if (position_ < size_) {
+ last_returned_size_ = min(block_size_, size_ - position_);
+ *data = data_ + position_;
+ *size = last_returned_size_;
+ position_ += last_returned_size_;
+ return true;
+ } else {
+ // We're at the end of the array.
+ last_returned_size_ = 0; // Don't let caller back up.
+ return false;
+ }
+}
+
+void ArrayOutputStream::BackUp(int count) {
+ GOOGLE_CHECK_GT(last_returned_size_, 0)
+ << "BackUp() can only be called after a successful Next().";
+ GOOGLE_CHECK_LE(count, last_returned_size_);
+ GOOGLE_CHECK_GE(count, 0);
+ position_ -= count;
+ last_returned_size_ = 0; // Don't let caller back up further.
+}
+
+int64 ArrayOutputStream::ByteCount() const {
+ return position_;
+}
+
+// ===================================================================
+
+StringOutputStream::StringOutputStream(string* target)
+ : target_(target) {
+}
+
+StringOutputStream::~StringOutputStream() {
+}
+
+bool StringOutputStream::Next(void** data, int* size) {
+ int old_size = target_->size();
+
+ // Grow the string.
+ if (old_size < target_->capacity()) {
+ // Resize the string to match its capacity, since we can get away
+ // without a memory allocation this way.
+ target_->resize(target_->capacity());
+ } else {
+ // Size has reached capacity, so double the size. Also make sure
+ // that the new size is at least kMinimumSize.
+ target_->resize(
+ max(old_size * 2,
+ kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness.
+ }
+
+ *data = string_as_array(target_) + old_size;
+ *size = target_->size() - old_size;
+ return true;
+}
+
+void StringOutputStream::BackUp(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ GOOGLE_CHECK_LE(count, target_->size());
+ target_->resize(target_->size() - count);
+}
+
+int64 StringOutputStream::ByteCount() const {
+ return target_->size();
+}
+
+// ===================================================================
+
+
+// ===================================================================
+
+CopyingInputStream::~CopyingInputStream() {}
+
+int CopyingInputStream::Skip(int count) {
+ char junk[4096];
+ int skipped = 0;
+ while (skipped < count) {
+ int bytes = Read(junk, min(count, implicit_cast<int>(sizeof(junk))));
+ if (bytes <= 0) {
+ // EOF or read error.
+ return skipped;
+ }
+ skipped += bytes;
+ }
+ return skipped;
+}
+
+CopyingInputStreamAdaptor::CopyingInputStreamAdaptor(
+ CopyingInputStream* copying_stream, int block_size)
+ : copying_stream_(copying_stream),
+ owns_copying_stream_(false),
+ failed_(false),
+ position_(0),
+ buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize),
+ buffer_used_(0),
+ backup_bytes_(0) {
+}
+
+CopyingInputStreamAdaptor::~CopyingInputStreamAdaptor() {
+ if (owns_copying_stream_) {
+ delete copying_stream_;
+ }
+}
+
+bool CopyingInputStreamAdaptor::Next(const void** data, int* size) {
+ if (failed_) {
+ // Already failed on a previous read.
+ return false;
+ }
+
+ AllocateBufferIfNeeded();
+
+ if (backup_bytes_ > 0) {
+ // We have data left over from a previous BackUp(), so just return that.
+ *data = buffer_.get() + buffer_used_ - backup_bytes_;
+ *size = backup_bytes_;
+ backup_bytes_ = 0;
+ return true;
+ }
+
+ // Read new data into the buffer.
+ buffer_used_ = copying_stream_->Read(buffer_.get(), buffer_size_);
+ if (buffer_used_ <= 0) {
+ // EOF or read error. We don't need the buffer anymore.
+ if (buffer_used_ < 0) {
+ // Read error (not EOF).
+ failed_ = true;
+ }
+ FreeBuffer();
+ return false;
+ }
+ position_ += buffer_used_;
+
+ *size = buffer_used_;
+ *data = buffer_.get();
+ return true;
+}
+
+void CopyingInputStreamAdaptor::BackUp(int count) {
+ GOOGLE_CHECK(backup_bytes_ == 0 && buffer_.get() != NULL)
+ << " BackUp() can only be called after Next().";
+ GOOGLE_CHECK_LE(count, buffer_used_)
+ << " Can't back up over more bytes than were returned by the last call"
+ " to Next().";
+ GOOGLE_CHECK_GE(count, 0)
+ << " Parameter to BackUp() can't be negative.";
+
+ backup_bytes_ = count;
+}
+
+bool CopyingInputStreamAdaptor::Skip(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+
+ if (failed_) {
+ // Already failed on a previous read.
+ return false;
+ }
+
+ // First skip any bytes left over from a previous BackUp().
+ if (backup_bytes_ >= count) {
+ // We have more data left over than we're trying to skip. Just chop it.
+ backup_bytes_ -= count;
+ return true;
+ }
+
+ count -= backup_bytes_;
+ backup_bytes_ = 0;
+
+ int skipped = copying_stream_->Skip(count);
+ position_ += skipped;
+ return skipped == count;
+}
+
+int64 CopyingInputStreamAdaptor::ByteCount() const {
+ return position_ - backup_bytes_;
+}
+
+void CopyingInputStreamAdaptor::AllocateBufferIfNeeded() {
+ if (buffer_.get() == NULL) {
+ buffer_.reset(new uint8[buffer_size_]);
+ }
+}
+
+void CopyingInputStreamAdaptor::FreeBuffer() {
+ GOOGLE_CHECK_EQ(backup_bytes_, 0);
+ buffer_used_ = 0;
+ buffer_.reset();
+}
+
+// ===================================================================
+
+CopyingOutputStream::~CopyingOutputStream() {}
+
+CopyingOutputStreamAdaptor::CopyingOutputStreamAdaptor(
+ CopyingOutputStream* copying_stream, int block_size)
+ : copying_stream_(copying_stream),
+ owns_copying_stream_(false),
+ failed_(false),
+ position_(0),
+ buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize),
+ buffer_used_(0) {
+}
+
+CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() {
+ WriteBuffer();
+ if (owns_copying_stream_) {
+ delete copying_stream_;
+ }
+}
+
+bool CopyingOutputStreamAdaptor::Flush() {
+ return WriteBuffer();
+}
+
+bool CopyingOutputStreamAdaptor::Next(void** data, int* size) {
+ if (buffer_used_ == buffer_size_) {
+ if (!WriteBuffer()) return false;
+ }
+
+ AllocateBufferIfNeeded();
+
+ *data = buffer_.get() + buffer_used_;
+ *size = buffer_size_ - buffer_used_;
+ buffer_used_ = buffer_size_;
+ return true;
+}
+
+void CopyingOutputStreamAdaptor::BackUp(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ GOOGLE_CHECK_EQ(buffer_used_, buffer_size_)
+ << " BackUp() can only be called after Next().";
+ GOOGLE_CHECK_LE(count, buffer_used_)
+ << " Can't back up over more bytes than were returned by the last call"
+ " to Next().";
+
+ buffer_used_ -= count;
+}
+
+int64 CopyingOutputStreamAdaptor::ByteCount() const {
+ return position_ + buffer_used_;
+}
+
+bool CopyingOutputStreamAdaptor::WriteBuffer() {
+ if (failed_) {
+ // Already failed on a previous write.
+ return false;
+ }
+
+ if (buffer_used_ == 0) return true;
+
+ if (copying_stream_->Write(buffer_.get(), buffer_used_)) {
+ position_ += buffer_used_;
+ buffer_used_ = 0;
+ return true;
+ } else {
+ failed_ = true;
+ FreeBuffer();
+ return false;
+ }
+}
+
+void CopyingOutputStreamAdaptor::AllocateBufferIfNeeded() {
+ if (buffer_ == NULL) {
+ buffer_.reset(new uint8[buffer_size_]);
+ }
+}
+
+void CopyingOutputStreamAdaptor::FreeBuffer() {
+ buffer_used_ = 0;
+ buffer_.reset();
+}
+
+// ===================================================================
+
+FileInputStream::FileInputStream(int file_descriptor, int block_size)
+ : copying_input_(file_descriptor),
+ impl_(&copying_input_, block_size) {
+}
+
+FileInputStream::~FileInputStream() {}
+
+bool FileInputStream::Close() {
+ return copying_input_.Close();
+}
+
+bool FileInputStream::Next(const void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void FileInputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+bool FileInputStream::Skip(int count) {
+ return impl_.Skip(count);
+}
+
+int64 FileInputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+FileInputStream::CopyingFileInputStream::CopyingFileInputStream(
+ int file_descriptor)
+ : file_(file_descriptor),
+ close_on_delete_(false),
+ is_closed_(false),
+ errno_(0),
+ previous_seek_failed_(false) {
+}
+
+FileInputStream::CopyingFileInputStream::~CopyingFileInputStream() {
+ if (close_on_delete_) {
+ if (!Close()) {
+ GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_);
+ }
+ }
+}
+
+bool FileInputStream::CopyingFileInputStream::Close() {
+ GOOGLE_CHECK(!is_closed_);
+
+ is_closed_ = true;
+ if (close_no_eintr(file_) != 0) {
+ // The docs on close() do not specify whether a file descriptor is still
+ // open after close() fails with EIO. However, the glibc source code
+ // seems to indicate that it is not.
+ errno_ = errno;
+ return false;
+ }
+
+ return true;
+}
+
+int FileInputStream::CopyingFileInputStream::Read(void* buffer, int size) {
+ GOOGLE_CHECK(!is_closed_);
+
+ int result;
+ do {
+ result = read(file_, buffer, size);
+ } while (result < 0 && errno == EINTR);
+
+ if (result < 0) {
+ // Read error (not EOF).
+ errno_ = errno;
+ }
+
+ return result;
+}
+
+int FileInputStream::CopyingFileInputStream::Skip(int count) {
+ GOOGLE_CHECK(!is_closed_);
+
+ if (!previous_seek_failed_ &&
+ lseek(file_, count, SEEK_CUR) != (off_t)-1) {
+ // Seek succeeded.
+ return count;
+ } else {
+ // Failed to seek.
+
+ // Note to self: Don't seek again. This file descriptor doesn't
+ // support it.
+ previous_seek_failed_ = true;
+
+ // Use the default implementation.
+ return CopyingInputStream::Skip(count);
+ }
+}
+
+// ===================================================================
+
+FileOutputStream::FileOutputStream(int file_descriptor, int block_size)
+ : copying_output_(file_descriptor),
+ impl_(&copying_output_, block_size) {
+}
+
+FileOutputStream::~FileOutputStream() {
+ impl_.Flush();
+}
+
+bool FileOutputStream::Close() {
+ bool flush_succeeded = impl_.Flush();
+ return copying_output_.Close() && flush_succeeded;
+}
+
+bool FileOutputStream::Next(void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void FileOutputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+int64 FileOutputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+FileOutputStream::CopyingFileOutputStream::CopyingFileOutputStream(
+ int file_descriptor)
+ : file_(file_descriptor),
+ close_on_delete_(false),
+ is_closed_(false),
+ errno_(0) {
+}
+
+FileOutputStream::CopyingFileOutputStream::~CopyingFileOutputStream() {
+ if (close_on_delete_) {
+ if (!Close()) {
+ GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_);
+ }
+ }
+}
+
+bool FileOutputStream::CopyingFileOutputStream::Close() {
+ GOOGLE_CHECK(!is_closed_);
+
+ is_closed_ = true;
+ if (close_no_eintr(file_) != 0) {
+ // The docs on close() do not specify whether a file descriptor is still
+ // open after close() fails with EIO. However, the glibc source code
+ // seems to indicate that it is not.
+ errno_ = errno;
+ return false;
+ }
+
+ return true;
+}
+
+bool FileOutputStream::CopyingFileOutputStream::Write(
+ const void* buffer, int size) {
+ GOOGLE_CHECK(!is_closed_);
+ int total_written = 0;
+
+ const uint8* buffer_base = reinterpret_cast<const uint8*>(buffer);
+
+ while (total_written < size) {
+ int bytes;
+ do {
+ bytes = write(file_, buffer_base + total_written, size - total_written);
+ } while (bytes < 0 && errno == EINTR);
+
+ if (bytes <= 0) {
+ // Write error.
+
+ // FIXME(kenton): According to the man page, if write() returns zero,
+ // there was no error; write() simply did not write anything. It's
+ // unclear under what circumstances this might happen, but presumably
+ // errno won't be set in this case. I am confused as to how such an
+ // event should be handled. For now I'm treating it as an error, since
+ // retrying seems like it could lead to an infinite loop. I suspect
+ // this never actually happens anyway.
+
+ if (bytes < 0) {
+ errno_ = errno;
+ }
+ return false;
+ }
+ total_written += bytes;
+ }
+
+ return true;
+}
+
+// ===================================================================
+
+IstreamInputStream::IstreamInputStream(istream* input, int block_size)
+ : copying_input_(input),
+ impl_(&copying_input_, block_size) {
+}
+
+IstreamInputStream::~IstreamInputStream() {}
+
+bool IstreamInputStream::Next(const void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void IstreamInputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+bool IstreamInputStream::Skip(int count) {
+ return impl_.Skip(count);
+}
+
+int64 IstreamInputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+IstreamInputStream::CopyingIstreamInputStream::CopyingIstreamInputStream(
+ istream* input)
+ : input_(input) {
+}
+
+IstreamInputStream::CopyingIstreamInputStream::~CopyingIstreamInputStream() {}
+
+int IstreamInputStream::CopyingIstreamInputStream::Read(
+ void* buffer, int size) {
+ input_->read(reinterpret_cast<char*>(buffer), size);
+ int result = input_->gcount();
+ if (result == 0 && input_->fail() && !input_->eof()) {
+ return -1;
+ }
+ return result;
+}
+
+// ===================================================================
+
+OstreamOutputStream::OstreamOutputStream(ostream* output, int block_size)
+ : copying_output_(output),
+ impl_(&copying_output_, block_size) {
+}
+
+OstreamOutputStream::~OstreamOutputStream() {
+ impl_.Flush();
+}
+
+bool OstreamOutputStream::Next(void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void OstreamOutputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+int64 OstreamOutputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+OstreamOutputStream::CopyingOstreamOutputStream::CopyingOstreamOutputStream(
+ ostream* output)
+ : output_(output) {
+}
+
+OstreamOutputStream::CopyingOstreamOutputStream::~CopyingOstreamOutputStream() {
+}
+
+bool OstreamOutputStream::CopyingOstreamOutputStream::Write(
+ const void* buffer, int size) {
+ output_->write(reinterpret_cast<const char*>(buffer), size);
+ return output_->good();
+}
+
+// ===================================================================
+
+ConcatenatingInputStream::ConcatenatingInputStream(
+ ZeroCopyInputStream* const streams[], int count)
+ : streams_(streams), stream_count_(count), bytes_retired_(0) {
+}
+
+ConcatenatingInputStream::~ConcatenatingInputStream() {
+}
+
+bool ConcatenatingInputStream::Next(const void** data, int* size) {
+ while (stream_count_ > 0) {
+ if (streams_[0]->Next(data, size)) return true;
+
+ // That stream is done. Advance to the next one.
+ bytes_retired_ += streams_[0]->ByteCount();
+ ++streams_;
+ --stream_count_;
+ }
+
+ // No more streams.
+ return false;
+}
+
+void ConcatenatingInputStream::BackUp(int count) {
+ if (stream_count_ > 0) {
+ streams_[0]->BackUp(count);
+ } else {
+ GOOGLE_LOG(DFATAL) << "Can't BackUp() after failed Next().";
+ }
+}
+
+bool ConcatenatingInputStream::Skip(int count) {
+ while (stream_count_ > 0) {
+ // Assume that ByteCount() can be used to find out how much we actually
+ // skipped when Skip() fails.
+ int64 target_byte_count = streams_[0]->ByteCount() + count;
+ if (streams_[0]->Skip(count)) return true;
+
+ // Hit the end of the stream. Figure out how many more bytes we still have
+ // to skip.
+ int64 final_byte_count = streams_[0]->ByteCount();
+ GOOGLE_DCHECK_LT(final_byte_count, target_byte_count);
+ count = target_byte_count - final_byte_count;
+
+ // That stream is done. Advance to the next one.
+ bytes_retired_ += final_byte_count;
+ ++streams_;
+ --stream_count_;
+ }
+
+ return false;
+}
+
+int64 ConcatenatingInputStream::ByteCount() const {
+ if (stream_count_ == 0) {
+ return bytes_retired_;
+ } else {
+ return bytes_retired_ + streams_[0]->ByteCount();
+ }
+}
+
+
+// ===================================================================
+
+LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
+ int64 limit)
+ : input_(input), limit_(limit) {}
+
+LimitingInputStream::~LimitingInputStream() {
+ // If we overshot the limit, back up.
+ if (limit_ < 0) input_->BackUp(-limit_);
+}
+
+bool LimitingInputStream::Next(const void** data, int* size) {
+ if (limit_ < 0) return false;
+ if (!input_->Next(data, size)) return false;
+
+ limit_ -= *size;
+ if (limit_ < 0) {
+ // We overshot the limit. Reduce *size to hide the rest of the buffer.
+ *size += limit_;
+ }
+ return true;
+}
+
+void LimitingInputStream::BackUp(int count) {
+ if (limit_ < 0) {
+ input_->BackUp(count - limit_);
+ limit_ = count;
+ } else {
+ input_->BackUp(count);
+ limit_ += count;
+ }
+}
+
+bool LimitingInputStream::Skip(int count) {
+ if (count > limit_) {
+ if (limit_ < 0) return false;
+ input_->Skip(limit_);
+ limit_ = 0;
+ return false;
+ } else {
+ if (!input_->Skip(count)) return false;
+ limit_ -= count;
+ return true;
+ }
+}
+
+int64 LimitingInputStream::ByteCount() const {
+ if (limit_ < 0) {
+ return input_->ByteCount() + limit_;
+ } else {
+ return input_->ByteCount();
+ }
+}
+
+
+// ===================================================================
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
new file mode 100644
index 00000000..bd73afb7
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -0,0 +1,617 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains common implementations of the interfaces defined in
+// zero_copy_stream.h. These implementations cover I/O on raw arrays,
+// strings, and file descriptors. Of course, many users will probably
+// want to write their own implementations of these interfaces specific
+// to the particular I/O abstractions they prefer to use, but these
+// should cover the most common cases.
+
+#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
+#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
+
+#include <string>
+#include <iosfwd>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/common.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ===================================================================
+
+// A ZeroCopyInputStream backed by an in-memory array of bytes.
+class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream {
+ public:
+ // Create an InputStream that returns the bytes pointed to by "data".
+ // "data" remains the property of the caller but must remain valid until
+ // the stream is destroyed. If a block_size is given, calls to Next()
+ // will return data blocks no larger than the given size. Otherwise, the
+ // first call to Next() returns the entire array. block_size is mainly
+ // useful for testing; in production you would probably never want to set
+ // it.
+ ArrayInputStream(const void* data, int size, int block_size = -1);
+ ~ArrayInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ const uint8* const data_; // The byte array.
+ const int size_; // Total size of the array.
+ const int block_size_; // How many bytes to return at a time.
+
+ int position_;
+ int last_returned_size_; // How many bytes we returned last time Next()
+ // was called (used for error checking only).
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream backed by an in-memory array of bytes.
+class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Create an OutputStream that writes to the bytes pointed to by "data".
+ // "data" remains the property of the caller but must remain valid until
+ // the stream is destroyed. If a block_size is given, calls to Next()
+ // will return data blocks no larger than the given size. Otherwise, the
+ // first call to Next() returns the entire array. block_size is mainly
+ // useful for testing; in production you would probably never want to set
+ // it.
+ ArrayOutputStream(void* data, int size, int block_size = -1);
+ ~ArrayOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ uint8* const data_; // The byte array.
+ const int size_; // Total size of the array.
+ const int block_size_; // How many bytes to return at a time.
+
+ int position_;
+ int last_returned_size_; // How many bytes we returned last time Next()
+ // was called (used for error checking only).
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which appends bytes to a string.
+class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Create a StringOutputStream which appends bytes to the given string.
+ // The string remains property of the caller, but it MUST NOT be accessed
+ // in any way until the stream is destroyed.
+ //
+ // Hint: If you call target->reserve(n) before creating the stream,
+ // the first call to Next() will return at least n bytes of buffer
+ // space.
+ explicit StringOutputStream(string* target);
+ ~StringOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ static const int kMinimumSize = 16;
+
+ string* target_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream);
+};
+
+// Note: There is no StringInputStream. Instead, just create an
+// ArrayInputStream as follows:
+// ArrayInputStream input(str.data(), str.size());
+
+// ===================================================================
+
+
+// ===================================================================
+
+// A generic traditional input stream interface.
+//
+// Lots of traditional input streams (e.g. file descriptors, C stdio
+// streams, and C++ iostreams) expose an interface where every read
+// involves copying bytes into a buffer. If you want to take such an
+// interface and make a ZeroCopyInputStream based on it, simply implement
+// CopyingInputStream and then use CopyingInputStreamAdaptor.
+//
+// CopyingInputStream implementations should avoid buffering if possible.
+// CopyingInputStreamAdaptor does its own buffering and will read data
+// in large blocks.
+class LIBPROTOBUF_EXPORT CopyingInputStream {
+ public:
+ virtual ~CopyingInputStream();
+
+ // Reads up to "size" bytes into the given buffer. Returns the number of
+ // bytes read. Read() waits until at least one byte is available, or
+ // returns zero if no bytes will ever become available (EOF), or -1 if a
+ // permanent read error occurred.
+ virtual int Read(void* buffer, int size) = 0;
+
+ // Skips the next "count" bytes of input. Returns the number of bytes
+ // actually skipped. This will always be exactly equal to "count" unless
+ // EOF was reached or a permanent read error occurred.
+ //
+ // The default implementation just repeatedly calls Read() into a scratch
+ // buffer.
+ virtual int Skip(int count);
+};
+
+// A ZeroCopyInputStream which reads from a CopyingInputStream. This is
+// useful for implementing ZeroCopyInputStreams that read from traditional
+// streams. Note that this class is not really zero-copy.
+//
+// If you want to read from file descriptors or C++ istreams, this is
+// already implemented for you: use FileInputStream or IstreamInputStream
+// respectively.
+class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given CopyingInputStream.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used. The caller retains ownership of
+ // copying_stream unless SetOwnsCopyingStream(true) is called.
+ explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream,
+ int block_size = -1);
+ ~CopyingInputStreamAdaptor();
+
+ // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to
+ // delete the underlying CopyingInputStream when it is destroyed.
+ void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; }
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ // Insures that buffer_ is not NULL.
+ void AllocateBufferIfNeeded();
+ // Frees the buffer and resets buffer_used_.
+ void FreeBuffer();
+
+ // The underlying copying stream.
+ CopyingInputStream* copying_stream_;
+ bool owns_copying_stream_;
+
+ // True if we have seen a permenant error from the underlying stream.
+ bool failed_;
+
+ // The current position of copying_stream_, relative to the point where
+ // we started reading.
+ int64 position_;
+
+ // Data is read into this buffer. It may be NULL if no buffer is currently
+ // in use. Otherwise, it points to an array of size buffer_size_.
+ scoped_array<uint8> buffer_;
+ const int buffer_size_;
+
+ // Number of valid bytes currently in the buffer (i.e. the size last
+ // returned by Next()). 0 <= buffer_used_ <= buffer_size_.
+ int buffer_used_;
+
+ // Number of bytes in the buffer which were backed up over by a call to
+ // BackUp(). These need to be returned again.
+ // 0 <= backup_bytes_ <= buffer_used_
+ int backup_bytes_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingInputStreamAdaptor);
+};
+
+// ===================================================================
+
+// A generic traditional output stream interface.
+//
+// Lots of traditional output streams (e.g. file descriptors, C stdio
+// streams, and C++ iostreams) expose an interface where every write
+// involves copying bytes from a buffer. If you want to take such an
+// interface and make a ZeroCopyOutputStream based on it, simply implement
+// CopyingOutputStream and then use CopyingOutputStreamAdaptor.
+//
+// CopyingOutputStream implementations should avoid buffering if possible.
+// CopyingOutputStreamAdaptor does its own buffering and will write data
+// in large blocks.
+class LIBPROTOBUF_EXPORT CopyingOutputStream {
+ public:
+ virtual ~CopyingOutputStream();
+
+ // Writes "size" bytes from the given buffer to the output. Returns true
+ // if successful, false on a write error.
+ virtual bool Write(const void* buffer, int size) = 0;
+};
+
+// A ZeroCopyOutputStream which writes to a CopyingOutputStream. This is
+// useful for implementing ZeroCopyOutputStreams that write to traditional
+// streams. Note that this class is not really zero-copy.
+//
+// If you want to write to file descriptors or C++ ostreams, this is
+// already implemented for you: use FileOutputStream or OstreamOutputStream
+// respectively.
+class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given Unix file descriptor.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream,
+ int block_size = -1);
+ ~CopyingOutputStreamAdaptor();
+
+ // Writes all pending data to the underlying stream. Returns false if a
+ // write error occurred on the underlying stream. (The underlying
+ // stream itself is not necessarily flushed.)
+ bool Flush();
+
+ // Call SetOwnsCopyingStream(true) to tell the CopyingOutputStreamAdaptor to
+ // delete the underlying CopyingOutputStream when it is destroyed.
+ void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; }
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ // Write the current buffer, if it is present.
+ bool WriteBuffer();
+ // Insures that buffer_ is not NULL.
+ void AllocateBufferIfNeeded();
+ // Frees the buffer.
+ void FreeBuffer();
+
+ // The underlying copying stream.
+ CopyingOutputStream* copying_stream_;
+ bool owns_copying_stream_;
+
+ // True if we have seen a permenant error from the underlying stream.
+ bool failed_;
+
+ // The current position of copying_stream_, relative to the point where
+ // we started writing.
+ int64 position_;
+
+ // Data is written from this buffer. It may be NULL if no buffer is
+ // currently in use. Otherwise, it points to an array of size buffer_size_.
+ scoped_array<uint8> buffer_;
+ const int buffer_size_;
+
+ // Number of valid bytes currently in the buffer (i.e. the size last
+ // returned by Next()). When BackUp() is called, we just reduce this.
+ // 0 <= buffer_used_ <= buffer_size_.
+ int buffer_used_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from a file descriptor.
+//
+// FileInputStream is preferred over using an ifstream with IstreamInputStream.
+// The latter will introduce an extra layer of buffering, harming performance.
+// Also, it's conceivable that FileInputStream could someday be enhanced
+// to use zero-copy file descriptors on OSs which support them.
+class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given Unix file descriptor.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used.
+ explicit FileInputStream(int file_descriptor, int block_size = -1);
+ ~FileInputStream();
+
+ // Flushes any buffers and closes the underlying file. Returns false if
+ // an error occurs during the process; use GetErrno() to examine the error.
+ // Even if an error occurs, the file descriptor is closed when this returns.
+ bool Close();
+
+ // By default, the file descriptor is not closed when the stream is
+ // destroyed. Call SetCloseOnDelete(true) to change that. WARNING:
+ // This leaves no way for the caller to detect if close() fails. If
+ // detecting close() errors is important to you, you should arrange
+ // to close the descriptor yourself.
+ void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); }
+
+ // If an I/O error has occurred on this file descriptor, this is the
+ // errno from that error. Otherwise, this is zero. Once an error
+ // occurs, the stream is broken and all subsequent operations will
+ // fail.
+ int GetErrno() { return copying_input_.GetErrno(); }
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingFileInputStream : public CopyingInputStream {
+ public:
+ CopyingFileInputStream(int file_descriptor);
+ ~CopyingFileInputStream();
+
+ bool Close();
+ void SetCloseOnDelete(bool value) { close_on_delete_ = value; }
+ int GetErrno() { return errno_; }
+
+ // implements CopyingInputStream ---------------------------------
+ int Read(void* buffer, int size);
+ int Skip(int count);
+
+ private:
+ // The file descriptor.
+ const int file_;
+ bool close_on_delete_;
+ bool is_closed_;
+
+ // The errno of the I/O error, if one has occurred. Otherwise, zero.
+ int errno_;
+
+ // Did we try to seek once and fail? If so, we assume this file descriptor
+ // doesn't support seeking and won't try again.
+ bool previous_seek_failed_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream);
+ };
+
+ CopyingFileInputStream copying_input_;
+ CopyingInputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which writes to a file descriptor.
+//
+// FileInputStream is preferred over using an ofstream with OstreamOutputStream.
+// The latter will introduce an extra layer of buffering, harming performance.
+// Also, it's conceivable that FileInputStream could someday be enhanced
+// to use zero-copy file descriptors on OSs which support them.
+class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given Unix file descriptor.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit FileOutputStream(int file_descriptor, int block_size = -1);
+ ~FileOutputStream();
+
+ // Flushes any buffers and closes the underlying file. Returns false if
+ // an error occurs during the process; use GetErrno() to examine the error.
+ // Even if an error occurs, the file descriptor is closed when this returns.
+ bool Close();
+
+ // By default, the file descriptor is not closed when the stream is
+ // destroyed. Call SetCloseOnDelete(true) to change that. WARNING:
+ // This leaves no way for the caller to detect if close() fails. If
+ // detecting close() errors is important to you, you should arrange
+ // to close the descriptor yourself.
+ void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); }
+
+ // If an I/O error has occurred on this file descriptor, this is the
+ // errno from that error. Otherwise, this is zero. Once an error
+ // occurs, the stream is broken and all subsequent operations will
+ // fail.
+ int GetErrno() { return copying_output_.GetErrno(); }
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingFileOutputStream : public CopyingOutputStream {
+ public:
+ CopyingFileOutputStream(int file_descriptor);
+ ~CopyingFileOutputStream();
+
+ bool Close();
+ void SetCloseOnDelete(bool value) { close_on_delete_ = value; }
+ int GetErrno() { return errno_; }
+
+ // implements CopyingOutputStream --------------------------------
+ bool Write(const void* buffer, int size);
+
+ private:
+ // The file descriptor.
+ const int file_;
+ bool close_on_delete_;
+ bool is_closed_;
+
+ // The errno of the I/O error, if one has occurred. Otherwise, zero.
+ int errno_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream);
+ };
+
+ CopyingFileOutputStream copying_output_;
+ CopyingOutputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from a C++ istream.
+//
+// Note that for reading files (or anything represented by a file descriptor),
+// FileInputStream is more efficient.
+class LIBPROTOBUF_EXPORT IstreamInputStream : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given C++ istream.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used.
+ explicit IstreamInputStream(istream* stream, int block_size = -1);
+ ~IstreamInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingIstreamInputStream : public CopyingInputStream {
+ public:
+ CopyingIstreamInputStream(istream* input);
+ ~CopyingIstreamInputStream();
+
+ // implements CopyingInputStream ---------------------------------
+ int Read(void* buffer, int size);
+ // (We use the default implementation of Skip().)
+
+ private:
+ // The stream.
+ istream* input_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream);
+ };
+
+ CopyingIstreamInputStream copying_input_;
+ CopyingInputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which writes to a C++ ostream.
+//
+// Note that for writing files (or anything represented by a file descriptor),
+// FileOutputStream is more efficient.
+class LIBPROTOBUF_EXPORT OstreamOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given C++ ostream.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit OstreamOutputStream(ostream* stream, int block_size = -1);
+ ~OstreamOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingOstreamOutputStream : public CopyingOutputStream {
+ public:
+ CopyingOstreamOutputStream(ostream* output);
+ ~CopyingOstreamOutputStream();
+
+ // implements CopyingOutputStream --------------------------------
+ bool Write(const void* buffer, int size);
+
+ private:
+ // The stream.
+ ostream* output_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream);
+ };
+
+ CopyingOstreamOutputStream copying_output_;
+ CopyingOutputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from several other streams in sequence.
+// ConcatenatingInputStream is unable to distinguish between end-of-stream
+// and read errors in the underlying streams, so it assumes any errors mean
+// end-of-stream. So, if the underlying streams fail for any other reason,
+// ConcatenatingInputStream may do odd things. It is suggested that you do
+// not use ConcatenatingInputStream on streams that might produce read errors
+// other than end-of-stream.
+class LIBPROTOBUF_EXPORT ConcatenatingInputStream : public ZeroCopyInputStream {
+ public:
+ // All streams passed in as well as the array itself must remain valid
+ // until the ConcatenatingInputStream is destroyed.
+ ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count);
+ ~ConcatenatingInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ // As streams are retired, streams_ is incremented and count_ is
+ // decremented.
+ ZeroCopyInputStream* const* streams_;
+ int stream_count_;
+ int64 bytes_retired_; // Bytes read from previous streams.
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which wraps some other stream and limits it to
+// a particular byte count.
+class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
+ public:
+ LimitingInputStream(ZeroCopyInputStream* input, int64 limit);
+ ~LimitingInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ ZeroCopyInputStream* input_;
+ int64 limit_; // Decreases as we go, becomes negative if we overshoot.
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
+};
+
+// ===================================================================
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
new file mode 100644
index 00000000..c618041f
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -0,0 +1,443 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Testing strategy: For each type of I/O (array, string, file, etc.) we
+// create an output stream and write some data to it, then create a
+// corresponding input stream to read the same data back and expect it to
+// match. When the data is written, it is written in several small chunks
+// of varying sizes, with a BackUp() after each chunk. It is read back
+// similarly, but with chunks separated at different points. The whole
+// process is run with a variety of block sizes for both the input and
+// the output.
+//
+// TODO(kenton): Rewrite this test to bring it up to the standards of all
+// the other proto2 tests. May want to wait for gTest to implement
+// "parametized tests" so that one set of tests can be used on all the
+// implementations.
+
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sstream>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+#ifdef _WIN32
+#define pipe(fds) _pipe(fds, 4096, O_BINARY)
+#endif
+
+#ifndef O_BINARY
+#ifdef _O_BINARY
+#define O_BINARY _O_BINARY
+#else
+#define O_BINARY 0 // If this isn't defined, the platform doesn't need it.
+#endif
+#endif
+
+class IoTest : public testing::Test {
+ protected:
+ // Test helpers.
+
+ // Helper to write an array of data to an output stream.
+ bool WriteToOutput(ZeroCopyOutputStream* output, const void* data, int size);
+ // Helper to read a fixed-length array of data from an input stream.
+ int ReadFromInput(ZeroCopyInputStream* input, void* data, int size);
+ // Write a string to the output stream.
+ void WriteString(ZeroCopyOutputStream* output, const char* str);
+ // Read a number of bytes equal to the size of the given string and checks
+ // that it matches the string.
+ void ReadString(ZeroCopyInputStream* input, const char* str);
+ // Writes some text to the output stream in a particular order. Returns
+ // the number of bytes written, incase the caller needs that to set up an
+ // input stream.
+ int WriteStuff(ZeroCopyOutputStream* output);
+ // Reads text from an input stream and expects it to match what
+ // WriteStuff() writes.
+ void ReadStuff(ZeroCopyInputStream* input);
+
+ static const int kBlockSizes[];
+ static const int kBlockSizeCount;
+};
+
+const int IoTest::kBlockSizes[] = {-1, 1, 2, 5, 7, 10, 23, 64};
+const int IoTest::kBlockSizeCount = GOOGLE_ARRAYSIZE(IoTest::kBlockSizes);
+
+bool IoTest::WriteToOutput(ZeroCopyOutputStream* output,
+ const void* data, int size) {
+ const uint8* in = reinterpret_cast<const uint8*>(data);
+ int in_size = size;
+
+ void* out;
+ int out_size;
+
+ while (true) {
+ if (!output->Next(&out, &out_size)) {
+ return false;
+ }
+
+ if (in_size <= out_size) {
+ memcpy(out, in, in_size);
+ output->BackUp(out_size - in_size);
+ return true;
+ }
+
+ memcpy(out, in, out_size);
+ in += out_size;
+ in_size -= out_size;
+ }
+}
+
+int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) {
+ uint8* out = reinterpret_cast<uint8*>(data);
+ int out_size = size;
+
+ const void* in;
+ int in_size = 0;
+
+ while (true) {
+ if (!input->Next(&in, &in_size)) {
+ return size - out_size;
+ }
+
+ if (out_size <= in_size) {
+ memcpy(out, in, out_size);
+ input->BackUp(in_size - out_size);
+ return size; // Copied all of it.
+ }
+
+ memcpy(out, in, in_size);
+ out += in_size;
+ out_size -= in_size;
+ }
+}
+
+void IoTest::WriteString(ZeroCopyOutputStream* output, const char* str) {
+ EXPECT_TRUE(WriteToOutput(output, str, strlen(str)));
+}
+
+void IoTest::ReadString(ZeroCopyInputStream* input, const char* str) {
+ int length = strlen(str);
+ scoped_array<char> buffer(new char[length + 1]);
+ buffer[length] = '\0';
+ EXPECT_EQ(ReadFromInput(input, buffer.get(), length), length);
+ EXPECT_STREQ(str, buffer.get());
+}
+
+int IoTest::WriteStuff(ZeroCopyOutputStream* output) {
+ WriteString(output, "Hello world!\n");
+ WriteString(output, "Some te");
+ WriteString(output, "xt. Blah blah.");
+ WriteString(output, "abcdefg");
+ WriteString(output, "01234567890123456789");
+ WriteString(output, "foobar");
+
+ EXPECT_EQ(output->ByteCount(), 68);
+
+ int result = output->ByteCount();
+ return result;
+}
+
+// Reads text from an input stream and expects it to match what WriteStuff()
+// writes.
+void IoTest::ReadStuff(ZeroCopyInputStream* input) {
+ ReadString(input, "Hello world!\n");
+ ReadString(input, "Some text. ");
+ ReadString(input, "Blah ");
+ ReadString(input, "blah.");
+ ReadString(input, "abcdefg");
+ EXPECT_TRUE(input->Skip(20));
+ ReadString(input, "foo");
+ ReadString(input, "bar");
+
+ EXPECT_EQ(input->ByteCount(), 68);
+
+ uint8 byte;
+ EXPECT_EQ(ReadFromInput(input, &byte, 1), 0);
+}
+
+// ===================================================================
+
+TEST_F(IoTest, ArrayIo) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+ size = WriteStuff(&output);
+ }
+ {
+ ArrayInputStream input(buffer, size, kBlockSizes[j]);
+ ReadStuff(&input);
+ }
+ }
+ }
+}
+
+// There is no string input, only string output. Also, it doesn't support
+// explicit block sizes. So, we'll only run one test and we'll use
+// ArrayInput to read back the results.
+TEST_F(IoTest, StringIo) {
+ string str;
+ {
+ StringOutputStream output(&str);
+ WriteStuff(&output);
+ }
+ {
+ ArrayInputStream input(str.data(), str.size());
+ ReadStuff(&input);
+ }
+}
+
+
+// To test files, we create a temporary file, write, read, truncate, repeat.
+TEST_F(IoTest, FileIo) {
+ string filename = TestTempDir() + "/zero_copy_stream_test_file";
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ // Make a temporary file.
+ int file =
+ open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777);
+ ASSERT_GE(file, 0);
+
+ {
+ FileOutputStream output(file, kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_EQ(0, output.GetErrno());
+ }
+
+ // Rewind.
+ ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1);
+
+ {
+ FileInputStream input(file, kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_EQ(0, input.GetErrno());
+ }
+
+ close(file);
+ }
+ }
+}
+
+// MSVC raises various debugging exceptions if we try to use a file
+// descriptor of -1, defeating our tests below. This class will disable
+// these debug assertions while in scope.
+class MsvcDebugDisabler {
+ public:
+#ifdef _MSC_VER
+ MsvcDebugDisabler() {
+ old_handler_ = _set_invalid_parameter_handler(MyHandler);
+ old_mode_ = _CrtSetReportMode(_CRT_ASSERT, 0);
+ }
+ ~MsvcDebugDisabler() {
+ old_handler_ = _set_invalid_parameter_handler(old_handler_);
+ old_mode_ = _CrtSetReportMode(_CRT_ASSERT, old_mode_);
+ }
+
+ static void MyHandler(const wchar_t *expr,
+ const wchar_t *func,
+ const wchar_t *file,
+ unsigned int line,
+ uintptr_t pReserved) {
+ // do nothing
+ }
+
+ _invalid_parameter_handler old_handler_;
+ int old_mode_;
+#else
+ // Dummy constructor and destructor to ensure that GCC doesn't complain
+ // that debug_disabler is an unused variable.
+ MsvcDebugDisabler() {}
+ ~MsvcDebugDisabler() {}
+#endif
+};
+
+// Test that FileInputStreams report errors correctly.
+TEST_F(IoTest, FileReadError) {
+ MsvcDebugDisabler debug_disabler;
+
+ // -1 = invalid file descriptor.
+ FileInputStream input(-1);
+
+ const void* buffer;
+ int size;
+ EXPECT_FALSE(input.Next(&buffer, &size));
+ EXPECT_EQ(EBADF, input.GetErrno());
+}
+
+// Test that FileOutputStreams report errors correctly.
+TEST_F(IoTest, FileWriteError) {
+ MsvcDebugDisabler debug_disabler;
+
+ // -1 = invalid file descriptor.
+ FileOutputStream input(-1);
+
+ void* buffer;
+ int size;
+
+ // The first call to Next() succeeds because it doesn't have anything to
+ // write yet.
+ EXPECT_TRUE(input.Next(&buffer, &size));
+
+ // Second call fails.
+ EXPECT_FALSE(input.Next(&buffer, &size));
+
+ EXPECT_EQ(EBADF, input.GetErrno());
+}
+
+// Pipes are not seekable, so File{Input,Output}Stream ends up doing some
+// different things to handle them. We'll test by writing to a pipe and
+// reading back from it.
+TEST_F(IoTest, PipeIo) {
+ int files[2];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ // Need to create a new pipe each time because ReadStuff() expects
+ // to see EOF at the end.
+ ASSERT_EQ(pipe(files), 0);
+
+ {
+ FileOutputStream output(files[1], kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_EQ(0, output.GetErrno());
+ }
+ close(files[1]); // Send EOF.
+
+ {
+ FileInputStream input(files[0], kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_EQ(0, input.GetErrno());
+ }
+ close(files[0]);
+ }
+ }
+}
+
+// Test using C++ iostreams.
+TEST_F(IoTest, IostreamIo) {
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ stringstream stream;
+
+ {
+ OstreamOutputStream output(&stream, kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_FALSE(stream.fail());
+ }
+
+ {
+ IstreamInputStream input(&stream, kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_TRUE(stream.eof());
+ }
+ }
+ }
+}
+
+// To test ConcatenatingInputStream, we create several ArrayInputStreams
+// covering a buffer and then concatenate them.
+TEST_F(IoTest, ConcatenatingInputStream) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ // Fill the buffer.
+ ArrayOutputStream output(buffer, kBufferSize);
+ WriteStuff(&output);
+
+ // Now split it up into multiple streams of varying sizes.
+ ASSERT_EQ(68, output.ByteCount()); // Test depends on this.
+ ArrayInputStream input1(buffer , 12);
+ ArrayInputStream input2(buffer + 12, 7);
+ ArrayInputStream input3(buffer + 19, 6);
+ ArrayInputStream input4(buffer + 25, 15);
+ ArrayInputStream input5(buffer + 40, 0);
+ // Note: We want to make sure we have a stream boundary somewhere between
+ // bytes 42 and 62, which is the range that it Skip()ed by ReadStuff(). This
+ // tests that a bug that existed in the original code for Skip() is fixed.
+ ArrayInputStream input6(buffer + 40, 10);
+ ArrayInputStream input7(buffer + 50, 18); // Total = 68 bytes.
+
+ ZeroCopyInputStream* streams[] =
+ {&input1, &input2, &input3, &input4, &input5, &input6, &input7};
+
+ // Create the concatenating stream and read.
+ ConcatenatingInputStream input(streams, GOOGLE_ARRAYSIZE(streams));
+ ReadStuff(&input);
+}
+
+// To test LimitingInputStream, we write our golden text to a buffer, then
+// create an ArrayInputStream that contains the whole buffer (not just the
+// bytes written), then use a LimitingInputStream to limit it just to the
+// bytes written.
+TEST_F(IoTest, LimitingInputStream) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ // Fill the buffer.
+ ArrayOutputStream output(buffer, kBufferSize);
+ WriteStuff(&output);
+
+ // Set up input.
+ ArrayInputStream array_input(buffer, kBufferSize);
+ LimitingInputStream input(&array_input, output.ByteCount());
+
+ ReadStuff(&input);
+}
+
+// Check that a zero-size array doesn't confuse the code.
+TEST(ZeroSizeArray, Input) {
+ ArrayInputStream input(NULL, 0);
+ const void* data;
+ int size;
+ EXPECT_FALSE(input.Next(&data, &size));
+}
+
+TEST(ZeroSizeArray, Output) {
+ ArrayOutputStream output(NULL, 0);
+ void* data;
+ int size;
+ EXPECT_FALSE(output.Next(&data, &size));
+}
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google