diff options
Diffstat (limited to 'src/google/protobuf/io')
-rw-r--r-- | src/google/protobuf/io/coded_stream.cc | 757 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream.h | 592 | ||||
-rw-r--r-- | src/google/protobuf/io/coded_stream_unittest.cc | 929 | ||||
-rw-r--r-- | src/google/protobuf/io/package_info.h | 40 | ||||
-rw-r--r-- | src/google/protobuf/io/printer.cc | 165 | ||||
-rw-r--r-- | src/google/protobuf/io/printer.h | 109 | ||||
-rw-r--r-- | src/google/protobuf/io/printer_unittest.cc | 210 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.cc | 679 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer.h | 276 | ||||
-rw-r--r-- | src/google/protobuf/io/tokenizer_unittest.cc | 706 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream.cc | 34 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream.h | 224 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl.cc | 793 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_impl.h | 617 | ||||
-rw-r--r-- | src/google/protobuf/io/zero_copy_stream_unittest.cc | 443 |
15 files changed, 6574 insertions, 0 deletions
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc new file mode 100644 index 00000000..58c44dc1 --- /dev/null +++ b/src/google/protobuf/io/coded_stream.cc @@ -0,0 +1,757 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This implementation is heavily optimized to make reads and writes +// of small values (especially varints) as fast as possible. In +// particular, we optimize for the common case that a read or a write +// will not cross the end of the buffer, since we can avoid a lot +// of branching in this case. + +#include <stack> +#include <limits.h> +#include <google/protobuf/io/coded_stream.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/stl_util-inl.h> + + +namespace google { +namespace protobuf { +namespace io { + +namespace { + +static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB + +static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB +static const int kDefaultRecursionLimit = 64; + +static const int kMaxVarintBytes = 10; +static const int kMaxVarint32Bytes = 5; + + +} // namespace + +// CodedInputStream ================================================== + +CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) + : input_(input), + buffer_(NULL), + buffer_size_(0), + total_bytes_read_(0), + overflow_bytes_(0), + + last_tag_(0), + legitimate_message_end_(false), + + aliasing_enabled_(false), + + current_limit_(INT_MAX), + buffer_size_after_limit_(0), + + total_bytes_limit_(kDefaultTotalBytesLimit), + total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), + + recursion_depth_(0), + recursion_limit_(kDefaultRecursionLimit) { +} + +CodedInputStream::~CodedInputStream() { + int backup_bytes = buffer_size_ + buffer_size_after_limit_ + overflow_bytes_; + if (backup_bytes > 0) { + // We still have bytes left over from the last buffer. Back up over + // them. + input_->BackUp(backup_bytes); + } +} + + +inline void CodedInputStream::RecomputeBufferLimits() { + buffer_size_ += buffer_size_after_limit_; + int closest_limit = min(current_limit_, total_bytes_limit_); + if (closest_limit < total_bytes_read_) { + // The limit position is in the current buffer. We must adjust + // the buffer size accordingly. + buffer_size_after_limit_ = total_bytes_read_ - closest_limit; + buffer_size_ -= buffer_size_after_limit_; + } else { + buffer_size_after_limit_ = 0; + } +} + +CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { + // Current position relative to the beginning of the stream. + int current_position = total_bytes_read_ - + (buffer_size_ + buffer_size_after_limit_); + + Limit old_limit = current_limit_; + + // security: byte_limit is possibly evil, so check for negative values + // and overflow. + if (byte_limit >= 0 && + byte_limit <= INT_MAX - current_position) { + current_limit_ = current_position + byte_limit; + } else { + // Negative or overflow. + current_limit_ = INT_MAX; + } + + // We need to enforce all limits, not just the new one, so if the previous + // limit was before the new requested limit, we continue to enforce the + // previous limit. + current_limit_ = min(current_limit_, old_limit); + + RecomputeBufferLimits(); + return old_limit; +} + +void CodedInputStream::PopLimit(Limit limit) { + // The limit passed in is actually the *old* limit, which we returned from + // PushLimit(). + current_limit_ = limit; + RecomputeBufferLimits(); + + // We may no longer be at a legitimate message end. ReadTag() needs to be + // called again to find out. + legitimate_message_end_ = false; +} + +int CodedInputStream::BytesUntilLimit() { + if (current_limit_ == INT_MAX) return -1; + int current_position = total_bytes_read_ - + (buffer_size_ + buffer_size_after_limit_); + + return current_limit_ - current_position; +} + +void CodedInputStream::SetTotalBytesLimit( + int total_bytes_limit, int warning_threshold) { + // Make sure the limit isn't already past, since this could confuse other + // code. + int current_position = total_bytes_read_ - + (buffer_size_ + buffer_size_after_limit_); + total_bytes_limit_ = max(current_position, total_bytes_limit); + total_bytes_warning_threshold_ = warning_threshold; + RecomputeBufferLimits(); +} + +void CodedInputStream::PrintTotalBytesLimitError() { + GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too " + "big (more than " << total_bytes_limit_ + << " bytes). To increase the limit (or to disable these " + "warnings), see CodedInputStream::SetTotalBytesLimit() " + "in google/protobuf/io/coded_stream.h."; +} + +bool CodedInputStream::Skip(int count) { + if (count < 0) return false; // security: count is often user-supplied + + if (count <= buffer_size_) { + // Just skipping within the current buffer. Easy. + Advance(count); + return true; + } + + if (buffer_size_after_limit_ > 0) { + // We hit a limit inside this buffer. Advance to the limit and fail. + Advance(buffer_size_); + return false; + } + + count -= buffer_size_; + buffer_ = NULL; + buffer_size_ = 0; + + // Make sure this skip doesn't try to skip past the current limit. + int closest_limit = min(current_limit_, total_bytes_limit_); + int bytes_until_limit = closest_limit - total_bytes_read_; + if (bytes_until_limit < count) { + // We hit the limit. Skip up to it then fail. + total_bytes_read_ = closest_limit; + input_->Skip(bytes_until_limit); + return false; + } + + total_bytes_read_ += count; + return input_->Skip(count); +} + +bool CodedInputStream::ReadRaw(void* buffer, int size) { + while (buffer_size_ < size) { + // Reading past end of buffer. Copy what we have, then refresh. + memcpy(buffer, buffer_, buffer_size_); + buffer = reinterpret_cast<uint8*>(buffer) + buffer_size_; + size -= buffer_size_; + if (!Refresh()) return false; + } + + memcpy(buffer, buffer_, size); + Advance(size); + + return true; +} + +bool CodedInputStream::ReadString(string* buffer, int size) { + if (size < 0) return false; // security: size is often user-supplied + + if (!buffer->empty()) { + buffer->clear(); + } + + if (size < buffer_size_) { + STLStringResizeUninitialized(buffer, size); + memcpy((uint8*)buffer->data(), buffer_, size); + Advance(size); + return true; + } + + while (buffer_size_ < size) { + // Some STL implementations "helpfully" crash on buffer->append(NULL, 0). + if (buffer_size_ != 0) { + // Note: string1.append(string2) is O(string2.size()) (as opposed to + // O(string1.size() + string2.size()), which would be bad). + buffer->append(reinterpret_cast<const char*>(buffer_), buffer_size_); + } + size -= buffer_size_; + if (!Refresh()) return false; + } + + buffer->append(reinterpret_cast<const char*>(buffer_), size); + Advance(size); + + return true; +} + + +bool CodedInputStream::ReadLittleEndian32(uint32* value) { + uint8 bytes[sizeof(*value)]; + + const uint8* ptr; + if (buffer_size_ >= sizeof(*value)) { + // Fast path: Enough bytes in the buffer to read directly. + ptr = buffer_; + Advance(sizeof(*value)); + } else { + // Slow path: Had to read past the end of the buffer. + if (!ReadRaw(bytes, sizeof(*value))) return false; + ptr = bytes; + } + + *value = (static_cast<uint32>(ptr[0]) ) | + (static_cast<uint32>(ptr[1]) << 8) | + (static_cast<uint32>(ptr[2]) << 16) | + (static_cast<uint32>(ptr[3]) << 24); + return true; +} + +bool CodedInputStream::ReadLittleEndian64(uint64* value) { + uint8 bytes[sizeof(*value)]; + + const uint8* ptr; + if (buffer_size_ >= sizeof(*value)) { + // Fast path: Enough bytes in the buffer to read directly. + ptr = buffer_; + Advance(sizeof(*value)); + } else { + // Slow path: Had to read past the end of the buffer. + if (!ReadRaw(bytes, sizeof(*value))) return false; + ptr = bytes; + } + + uint32 part0 = (static_cast<uint32>(ptr[0]) ) | + (static_cast<uint32>(ptr[1]) << 8) | + (static_cast<uint32>(ptr[2]) << 16) | + (static_cast<uint32>(ptr[3]) << 24); + uint32 part1 = (static_cast<uint32>(ptr[4]) ) | + (static_cast<uint32>(ptr[5]) << 8) | + (static_cast<uint32>(ptr[6]) << 16) | + (static_cast<uint32>(ptr[7]) << 24); + *value = static_cast<uint64>(part0) | + (static_cast<uint64>(part1) << 32); + return true; +} + +bool CodedInputStream::ReadVarint32Fallback(uint32* value) { + if (buffer_size_ >= kMaxVarintBytes || + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + const uint8* ptr = buffer_; + uint32 b; + uint32 result; + + b = *(ptr++); result = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); result |= b << 28; if (!(b & 0x80)) goto done; + + // If the input is larger than 32 bits, we still need to read it all + // and discard the high-order bits. + for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { + b = *(ptr++); if (!(b & 0x80)) goto done; + } + + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return false; + + done: + Advance(ptr - buffer_); + *value = result; + return true; + + } else { + // Optimization: If we're at a limit, detect that quickly. (This is + // common when reading tags.) + while (buffer_size_ == 0) { + // Detect cases where we definitely hit a byte limit without calling + // Refresh(). + if (// If we hit a limit, buffer_size_after_limit_ will be non-zero. + buffer_size_after_limit_ > 0 && + // Make sure that the limit we hit is not total_bytes_limit_, since + // in that case we still need to call Refresh() so that it prints an + // error. + total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) { + // We hit a byte limit. + legitimate_message_end_ = true; + return false; + } + + // Call refresh. + if (!Refresh()) { + // Refresh failed. Make sure that it failed due to EOF, not because + // we hit total_bytes_limit_, which, unlike normal limits, is not a + // valid place to end a message. + int current_position = total_bytes_read_ - buffer_size_after_limit_; + if (current_position >= total_bytes_limit_) { + // Hit total_bytes_limit_. But if we also hit the normal limit, + // we're still OK. + legitimate_message_end_ = current_limit_ == total_bytes_limit_; + } else { + legitimate_message_end_ = true; + } + return false; + } + } + + // Slow path: Just do a 64-bit read. + uint64 result; + if (!ReadVarint64(&result)) return false; + *value = (uint32)result; + return true; + } +} + +bool CodedInputStream::ReadVarint64(uint64* value) { + if (buffer_size_ >= kMaxVarintBytes || + // Optimization: If the varint ends at exactly the end of the buffer, + // we can detect that and still use the fast path. + (buffer_size_ != 0 && !(buffer_[buffer_size_-1] & 0x80))) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this read won't cross the end, so we can skip the checks. + + const uint8* ptr = buffer_; + uint32 b; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + uint32 part0 = 0, part1 = 0, part2 = 0; + + b = *(ptr++); part0 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part0 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); part1 |= (b & 0x7F) << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 = (b & 0x7F) ; if (!(b & 0x80)) goto done; + b = *(ptr++); part2 |= (b & 0x7F) << 7; if (!(b & 0x80)) goto done; + + // We have overrun the maximum size of a varint (10 bytes). The data + // must be corrupt. + return false; + + done: + Advance(ptr - buffer_); + *value = (static_cast<uint64>(part0) ) | + (static_cast<uint64>(part1) << 28) | + (static_cast<uint64>(part2) << 56); + return true; + + } else { + // Slow path: This read might cross the end of the buffer, so we + // need to check and refresh the buffer if and when it does. + + uint64 result = 0; + int count = 0; + uint32 b; + + do { + if (count == kMaxVarintBytes) return false; + while (buffer_size_ == 0) { + if (!Refresh()) return false; + } + b = *buffer_; + result |= static_cast<uint64>(b & 0x7F) << (7 * count); + Advance(1); + ++count; + } while(b & 0x80); + + *value = result; + return true; + } +} + +bool CodedInputStream::Refresh() { + if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0) { + // We've hit a limit. Stop. + buffer_ += buffer_size_; + buffer_size_ = 0; + + int current_position = total_bytes_read_ - buffer_size_after_limit_; + + if (current_position >= total_bytes_limit_ && + total_bytes_limit_ != current_limit_) { + // Hit total_bytes_limit_. + PrintTotalBytesLimitError(); + } + + return false; + } + + if (total_bytes_warning_threshold_ >= 0 && + total_bytes_read_ >= total_bytes_warning_threshold_) { + GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message. If the " + "message turns out to be larger than " + << total_bytes_limit_ << " bytes, parsing will be halted " + "for security reasons. To increase the limit (or to " + "disable these warnings), see " + "CodedInputStream::SetTotalBytesLimit() in " + "google/protobuf/io/coded_stream.h."; + + // Don't warn again for this stream. + total_bytes_warning_threshold_ = -1; + } + + const void* void_buffer; + if (input_->Next(&void_buffer, &buffer_size_)) { + buffer_ = reinterpret_cast<const uint8*>(void_buffer); + GOOGLE_CHECK_GE(buffer_size_, 0); + + if (total_bytes_read_ <= INT_MAX - buffer_size_) { + total_bytes_read_ += buffer_size_; + } else { + // Overflow. Reset buffer_size_ to not include the bytes beyond INT_MAX. + // We can't get that far anyway, because total_bytes_limit_ is guaranteed + // to be less than it. We need to keep track of the number of bytes + // we discarded, though, so that we can call input_->BackUp() to back + // up over them on destruction. + + // The following line is equivalent to: + // overflow_bytes_ = total_bytes_read_ + buffer_size_ - INT_MAX; + // except that it avoids overflows. Signed integer overflow has + // undefined results according to the C standard. + overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size_); + buffer_size_ -= overflow_bytes_; + total_bytes_read_ = INT_MAX; + } + + RecomputeBufferLimits(); + return true; + } else { + buffer_ = NULL; + buffer_size_ = 0; + return false; + } +} + +// CodedOutputStream ================================================= + +CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output) + : output_(output), + buffer_(NULL), + buffer_size_(0), + total_bytes_(0) { +} + +CodedOutputStream::~CodedOutputStream() { + if (buffer_size_ > 0) { + output_->BackUp(buffer_size_); + } +} + +bool CodedOutputStream::WriteRaw(const void* data, int size) { + while (buffer_size_ < size) { + memcpy(buffer_, data, buffer_size_); + size -= buffer_size_; + data = reinterpret_cast<const uint8*>(data) + buffer_size_; + if (!Refresh()) return false; + } + + memcpy(buffer_, data, size); + Advance(size); + return true; +} + + +bool CodedOutputStream::WriteLittleEndian32(uint32 value) { + uint8 bytes[sizeof(value)]; + + bool use_fast = buffer_size_ >= sizeof(value); + uint8* ptr = use_fast ? buffer_ : bytes; + + ptr[0] = static_cast<uint8>(value ); + ptr[1] = static_cast<uint8>(value >> 8); + ptr[2] = static_cast<uint8>(value >> 16); + ptr[3] = static_cast<uint8>(value >> 24); + + if (use_fast) { + Advance(sizeof(value)); + return true; + } else { + return WriteRaw(bytes, sizeof(value)); + } +} + +bool CodedOutputStream::WriteLittleEndian64(uint64 value) { + uint8 bytes[sizeof(value)]; + + uint32 part0 = static_cast<uint32>(value); + uint32 part1 = static_cast<uint32>(value >> 32); + + bool use_fast = buffer_size_ >= sizeof(value); + uint8* ptr = use_fast ? buffer_ : bytes; + + ptr[0] = static_cast<uint8>(part0 ); + ptr[1] = static_cast<uint8>(part0 >> 8); + ptr[2] = static_cast<uint8>(part0 >> 16); + ptr[3] = static_cast<uint8>(part0 >> 24); + ptr[4] = static_cast<uint8>(part1 ); + ptr[5] = static_cast<uint8>(part1 >> 8); + ptr[6] = static_cast<uint8>(part1 >> 16); + ptr[7] = static_cast<uint8>(part1 >> 24); + + if (use_fast) { + Advance(sizeof(value)); + return true; + } else { + return WriteRaw(bytes, sizeof(value)); + } +} + +bool CodedOutputStream::WriteVarint32Fallback(uint32 value) { + if (buffer_size_ >= kMaxVarint32Bytes) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this write won't cross the end, so we can skip the checks. + uint8* target = buffer_; + + target[0] = static_cast<uint8>(value | 0x80); + if (value >= (1 << 7)) { + target[1] = static_cast<uint8>((value >> 7) | 0x80); + if (value >= (1 << 14)) { + target[2] = static_cast<uint8>((value >> 14) | 0x80); + if (value >= (1 << 21)) { + target[3] = static_cast<uint8>((value >> 21) | 0x80); + if (value >= (1 << 28)) { + target[4] = static_cast<uint8>(value >> 28); + Advance(5); + } else { + target[3] &= 0x7F; + Advance(4); + } + } else { + target[2] &= 0x7F; + Advance(3); + } + } else { + target[1] &= 0x7F; + Advance(2); + } + } else { + target[0] &= 0x7F; + Advance(1); + } + + return true; + } else { + // Slow path: This write might cross the end of the buffer, so we + // compose the bytes first then use WriteRaw(). + uint8 bytes[kMaxVarint32Bytes]; + int size = 0; + while (value > 0x7F) { + bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80; + value >>= 7; + } + bytes[size++] = static_cast<uint8>(value) & 0x7F; + return WriteRaw(bytes, size); + } +} + +bool CodedOutputStream::WriteVarint64(uint64 value) { + if (buffer_size_ >= kMaxVarintBytes) { + // Fast path: We have enough bytes left in the buffer to guarantee that + // this write won't cross the end, so we can skip the checks. + uint8* target = buffer_; + + // Splitting into 32-bit pieces gives better performance on 32-bit + // processors. + uint32 part0 = static_cast<uint32>(value ); + uint32 part1 = static_cast<uint32>(value >> 28); + uint32 part2 = static_cast<uint32>(value >> 56); + + int size; + + // Here we can't really optimize for small numbers, since the value is + // split into three parts. Cheking for numbers < 128, for instance, + // would require three comparisons, since you'd have to make sure part1 + // and part2 are zero. However, if the caller is using 64-bit integers, + // it is likely that they expect the numbers to often be very large, so + // we probably don't want to optimize for small numbers anyway. Thus, + // we end up with a hardcoded binary search tree... + if (part2 == 0) { + if (part1 == 0) { + if (part0 < (1 << 14)) { + if (part0 < (1 << 7)) { + size = 1; goto size1; + } else { + size = 2; goto size2; + } + } else { + if (part0 < (1 << 21)) { + size = 3; goto size3; + } else { + size = 4; goto size4; + } + } + } else { + if (part1 < (1 << 14)) { + if (part1 < (1 << 7)) { + size = 5; goto size5; + } else { + size = 6; goto size6; + } + } else { + if (part1 < (1 << 21)) { + size = 7; goto size7; + } else { + size = 8; goto size8; + } + } + } + } else { + if (part2 < (1 << 7)) { + size = 9; goto size9; + } else { + size = 10; goto size10; + } + } + + GOOGLE_LOG(FATAL) << "Can't get here."; + + size10: target[9] = static_cast<uint8>((part2 >> 7) | 0x80); + size9 : target[8] = static_cast<uint8>((part2 ) | 0x80); + size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80); + size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80); + size6 : target[5] = static_cast<uint8>((part1 >> 7) | 0x80); + size5 : target[4] = static_cast<uint8>((part1 ) | 0x80); + size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80); + size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80); + size2 : target[1] = static_cast<uint8>((part0 >> 7) | 0x80); + size1 : target[0] = static_cast<uint8>((part0 ) | 0x80); + + target[size-1] &= 0x7F; + Advance(size); + return true; + } else { + // Slow path: This write might cross the end of the buffer, so we + // compose the bytes first then use WriteRaw(). + uint8 bytes[kMaxVarintBytes]; + int size = 0; + while (value > 0x7F) { + bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80; + value >>= 7; + } + bytes[size++] = static_cast<uint8>(value) & 0x7F; + return WriteRaw(bytes, size); + } +} + +bool CodedOutputStream::Refresh() { + void* void_buffer; + if (output_->Next(&void_buffer, &buffer_size_)) { + buffer_ = reinterpret_cast<uint8*>(void_buffer); + total_bytes_ += buffer_size_; + return true; + } else { + buffer_ = NULL; + buffer_size_ = 0; + return false; + } +} + +int CodedOutputStream::VarintSize32Fallback(uint32 value) { + if (value < (1 << 7)) { + return 1; + } else if (value < (1 << 14)) { + return 2; + } else if (value < (1 << 21)) { + return 3; + } else if (value < (1 << 28)) { + return 4; + } else { + return 5; + } +} + +int CodedOutputStream::VarintSize64(uint64 value) { + if (value < (1ull << 35)) { + if (value < (1ull << 7)) { + return 1; + } else if (value < (1ull << 14)) { + return 2; + } else if (value < (1ull << 21)) { + return 3; + } else if (value < (1ull << 28)) { + return 4; + } else { + return 5; + } + } else { + if (value < (1ull << 42)) { + return 6; + } else if (value < (1ull << 49)) { + return 7; + } else if (value < (1ull << 56)) { + return 8; + } else if (value < (1ull << 63)) { + return 9; + } else { + return 10; + } + } +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h new file mode 100644 index 00000000..91e5c56a --- /dev/null +++ b/src/google/protobuf/io/coded_stream.h @@ -0,0 +1,592 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the CodedInputStream and CodedOutputStream classes, +// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, +// and allow you to read or write individual pieces of data in various +// formats. In particular, these implement the varint encoding for +// integers, a simple variable-length encoding in which smaller numbers +// take fewer bytes. +// +// Typically these classes will only be used internally by the protocol +// buffer library in order to encode and decode protocol buffers. Clients +// of the library only need to know about this class if they wish to write +// custom message parsing or serialization procedures. +// +// CodedOutputStream example: +// // Write some data to "myfile". First we write a 4-byte "magic number" +// // to identify the file type, then write a length-delimited string. The +// // string is composed of a varint giving the length followed by the raw +// // bytes. +// int fd = open("myfile", O_WRONLY); +// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); +// +// int magic_number = 1234; +// char text[] = "Hello world!"; +// coded_output->WriteLittleEndian32(magic_number); +// coded_output->WriteVarint32(strlen(text)); +// coded_output->WriteRaw(text, strlen(text)); +// +// delete coded_output; +// delete raw_output; +// close(fd); +// +// CodedInputStream example: +// // Read a file created by the above code. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* raw_input = new FileInputStream(fd); +// CodedInputStream coded_input = new CodedInputStream(raw_input); +// +// coded_input->ReadLittleEndian32(&magic_number); +// if (magic_number != 1234) { +// cerr << "File not in expected format." << endl; +// return; +// } +// +// uint32 size; +// coded_input->ReadVarint32(&size); +// +// char* text = new char[size + 1]; +// coded_input->ReadRaw(buffer, size); +// text[size] = '\0'; +// +// delete coded_input; +// delete raw_input; +// close(fd); +// +// cout << "Text is: " << text << endl; +// delete [] text; +// +// For those who are interested, varint encoding is defined as follows: +// +// The encoding operates on unsigned integers of up to 64 bits in length. +// Each byte of the encoded value has the format: +// * bits 0-6: Seven bits of the number being encoded. +// * bit 7: Zero if this is the last byte in the encoding (in which +// case all remaining bits of the number are zero) or 1 if +// more bytes follow. +// The first byte contains the least-significant 7 bits of the number, the +// second byte (if present) contains the next-least-significant 7 bits, +// and so on. So, the binary number 1011000101011 would be encoded in two +// bytes as "10101011 00101100". +// +// In theory, varint could be used to encode integers of any length. +// However, for practicality we set a limit at 64 bits. The maximum encoded +// length of a number is thus 10 bytes. + +#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ + +#include <string> +#include <google/protobuf/stubs/common.h> + +namespace google { + +namespace protobuf { +namespace io { + +// Defined in this file. +class CodedInputStream; +class CodedOutputStream; + +// Defined in other files. +class ZeroCopyInputStream; // zero_copy_stream.h +class ZeroCopyOutputStream; // zero_copy_stream.h + +// Class which reads and decodes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. +// Most users will not need to deal with CodedInputStream. +// +// Most methods of CodedInputStream that return a bool return false if an +// underlying I/O error occurs or if the data is malformed. Once such a +// failure occurs, the CodedInputStream is broken and is no longer useful. +class LIBPROTOBUF_EXPORT CodedInputStream { + public: + // Create a CodedInputStream that reads from the given ZeroCopyInputStream. + explicit CodedInputStream(ZeroCopyInputStream* input); + + // Destroy the CodedInputStream and position the underlying + // ZeroCopyInputStream at the first unread byte. If an error occurred while + // reading (causing a method to return false), then the exact position of + // the input stream may be anywhere between the last value that was read + // successfully and the stream's byte limit. + ~CodedInputStream(); + + + // Skips a number of bytes. Returns false if an underlying read error + // occurs. + bool Skip(int count); + + // Read raw bytes, copying them into the given buffer. + bool ReadRaw(void* buffer, int size); + + // Like ReadRaw, but reads into a string. + // + // Implementation Note: ReadString() grows the string gradually as it + // reads in the data, rather than allocating the entire requested size + // upfront. This prevents denial-of-service attacks in which a client + // could claim that a string is going to be MAX_INT bytes long in order to + // crash the server because it can't allocate this much space at once. + bool ReadString(string* buffer, int size); + + + // Read a 32-bit little-endian integer. + bool ReadLittleEndian32(uint32* value); + // Read a 64-bit little-endian integer. + bool ReadLittleEndian64(uint64* value); + + // Read an unsigned integer with Varint encoding, truncating to 32 bits. + // Reading a 32-bit value is equivalent to reading a 64-bit one and casting + // it to uint32, but may be more efficient. + bool ReadVarint32(uint32* value); + // Read an unsigned integer with Varint encoding. + bool ReadVarint64(uint64* value); + + // Read a tag. This calls ReadVarint32() and returns the result, or returns + // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates + // the last tag value, which can be checked with LastTagWas(). + // Always inline because this is only called in once place per parse loop + // but it is called for every iteration of said loop, so it should be fast. + // GCC doesn't want to inline this by default. + uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + + // Usually returns true if calling ReadVarint32() now would produce the given + // value. Will always return false if ReadVarint32() would not return the + // given value. If ExpectTag() returns true, it also advances past + // the varint. For best performance, use a compile-time constant as the + // parameter. + // Always inline because this collapses to a small number of instructions + // when given a constant parameter, but GCC doesn't want to inline by default. + bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + + // Usually returns true if no more bytes can be read. Always returns false + // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent + // call to LastTagWas() will act as if ReadTag() had been called and returned + // zero, and ConsumedEntireMessage() will return true. + bool ExpectAtEnd(); + + // If the last call to ReadTag() returned the given value, returns true. + // Otherwise, returns false; + // + // This is needed because parsers for some types of embedded messages + // (with field type TYPE_GROUP) don't actually know that they've reached the + // end of a message until they see an ENDGROUP tag, which was actually part + // of the enclosing message. The enclosing message would like to check that + // tag to make sure it had the right number, so it calls LastTagWas() on + // return from the embedded parser to check. + bool LastTagWas(uint32 expected); + + // When parsing message (but NOT a group), this method must be called + // immediately after MergeFromCodedStream() returns (if it returns true) + // to further verify that the message ended in a legitimate way. For + // example, this verifies that parsing did not end on an end-group tag. + // It also checks for some cases where, due to optimizations, + // MergeFromCodedStream() can incorrectly return true. + bool ConsumedEntireMessage(); + + // Limits ---------------------------------------------------------- + // Limits are used when parsing length-delimited embedded messages. + // After the message's length is read, PushLimit() is used to prevent + // the CodedInputStream from reading beyond that length. Once the + // embedded message has been parsed, PopLimit() is called to undo the + // limit. + + // Opaque type used with PushLimit() and PopLimit(). Do not modify + // values of this type yourself. The only reason that this isn't a + // struct with private internals is for efficiency. + typedef int Limit; + + // Places a limit on the number of bytes that the stream may read, + // starting from the current position. Once the stream hits this limit, + // it will act like the end of the input has been reached until PopLimit() + // is called. + // + // As the names imply, the stream conceptually has a stack of limits. The + // shortest limit on the stack is always enforced, even if it is not the + // top limit. + // + // The value returned by PushLimit() is opaque to the caller, and must + // be passed unchanged to the corresponding call to PopLimit(). + Limit PushLimit(int byte_limit); + + // Pops the last limit pushed by PushLimit(). The input must be the value + // returned by that call to PushLimit(). + void PopLimit(Limit limit); + + // Returns the number of bytes left until the nearest limit on the + // stack is hit, or -1 if no limits are in place. + int BytesUntilLimit(); + + // Total Bytes Limit ----------------------------------------------- + // To prevent malicious users from sending excessively large messages + // and causing integer overflows or memory exhaustion, CodedInputStream + // imposes a hard limit on the total number of bytes it will read. + + // Sets the maximum number of bytes that this CodedInputStream will read + // before refusing to continue. To prevent integer overflows in the + // protocol buffers implementation, as well as to prevent servers from + // allocating enormous amounts of memory to hold parsed messages, the + // maximum message length should be limited to the shortest length that + // will not harm usability. The theoretical shortest message that could + // cause integer overflows is 512MB. The default limit is 64MB. Apps + // should set shorter limits if possible. If warning_threshold is not -1, + // a warning will be printed to stderr after warning_threshold bytes are + // read. An error will always be printed to stderr if the limit is + // reached. + // + // This is unrelated to PushLimit()/PopLimit(). + // + // Hint: If you are reading this because your program is printing a + // warning about dangerously large protocol messages, you may be + // confused about what to do next. The best option is to change your + // design such that excessively large messages are not necessary. + // For example, try to design file formats to consist of many small + // messages rather than a single large one. If this is infeasible, + // you will need to increase the limit. Chances are, though, that + // your code never constructs a CodedInputStream on which the limit + // can be set. You probably parse messages by calling things like + // Message::ParseFromString(). In this case, you will need to change + // your code to instead construct some sort of ZeroCopyInputStream + // (e.g. an ArrayInputStream), construct a CodedInputStream around + // that, then call Message::ParseFromCodedStream() instead. Then + // you can adjust the limit. Yes, it's more work, but you're doing + // something unusual. + void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); + + // Recursion Limit ------------------------------------------------- + // To prevent corrupt or malicious messages from causing stack overflows, + // we must keep track of the depth of recursion when parsing embedded + // messages and groups. CodedInputStream keeps track of this because it + // is the only object that is passed down the stack during parsing. + + // Sets the maximum recursion depth. The default is 64. + void SetRecursionLimit(int limit); + + // Increments the current recursion depth. Returns true if the depth is + // under the limit, false if it has gone over. + bool IncrementRecursionDepth(); + + // Decrements the recursion depth. + void DecrementRecursionDepth(); + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); + + ZeroCopyInputStream* input_; + const uint8* buffer_; + int buffer_size_; // size of current buffer + int total_bytes_read_; // total bytes read from input_, including + // the current buffer + + // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here + // so that we can BackUp() on destruction. + int overflow_bytes_; + + // LastTagWas() stuff. + uint32 last_tag_; // result of last ReadTag(). + + // This is set true by ReadVarint32Fallback() if it is called when exactly + // at EOF, or by ExpectAtEnd() when it returns true. This happens when we + // reach the end of a message and attempt to read another tag. + bool legitimate_message_end_; + + // See EnableAliasing(). + bool aliasing_enabled_; + + // Limits + Limit current_limit_; // if position = -1, no limit is applied + + // For simplicity, if the current buffer crosses a limit (either a normal + // limit created by PushLimit() or the total bytes limit), buffer_size_ + // only tracks the number of bytes before that limit. This field + // contains the number of bytes after it. Note that this implies that if + // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've + // hit a limit. However, if both are zero, it doesn't necessarily mean + // we aren't at a limit -- the buffer may have ended exactly at the limit. + int buffer_size_after_limit_; + + // Maximum number of bytes to read, period. This is unrelated to + // current_limit_. Set using SetTotalBytesLimit(). + int total_bytes_limit_; + int total_bytes_warning_threshold_; + + // Current recursion depth, controlled by IncrementRecursionDepth() and + // DecrementRecursionDepth(). + int recursion_depth_; + // Recursion depth limit, set by SetRecursionLimit(). + int recursion_limit_; + + // Advance the buffer by a given number of bytes. + void Advance(int amount); + + // Recomputes the value of buffer_size_after_limit_. Must be called after + // current_limit_ or total_bytes_limit_ changes. + void RecomputeBufferLimits(); + + // Writes an error message saying that we hit total_bytes_limit_. + void PrintTotalBytesLimitError(); + + // Called when the buffer runs out to request more data. Implies an + // Advance(buffer_size_). + bool Refresh(); + + bool ReadVarint32Fallback(uint32* value); +}; + +// Class which encodes and writes binary data which is composed of varint- +// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. +// Most users will not need to deal with CodedOutputStream. +// +// Most methods of CodedOutputStream which return a bool return false if an +// underlying I/O error occurs. Once such a failure occurs, the +// CodedOutputStream is broken and is no longer useful. +class LIBPROTOBUF_EXPORT CodedOutputStream { + public: + // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. + explicit CodedOutputStream(ZeroCopyOutputStream* output); + + // Destroy the CodedOutputStream and position the underlying + // ZeroCopyOutputStream immediately after the last byte written. + ~CodedOutputStream(); + + // Write raw bytes, copying them from the given buffer. + bool WriteRaw(const void* buffer, int size); + + // Equivalent to WriteRaw(str.data(), str.size()). + bool WriteString(const string& str); + + + // Write a 32-bit little-endian integer. + bool WriteLittleEndian32(uint32 value); + // Write a 64-bit little-endian integer. + bool WriteLittleEndian64(uint64 value); + + // Write an unsigned integer with Varint encoding. Writing a 32-bit value + // is equivalent to casting it to uint64 and writing it as a 64-bit value, + // but may be more efficient. + bool WriteVarint32(uint32 value); + // Write an unsigned integer with Varint encoding. + bool WriteVarint64(uint64 value); + + // Equivalent to WriteVarint32() except when the value is negative, + // in which case it must be sign-extended to a full 10 bytes. + bool WriteVarint32SignExtended(int32 value); + + // This is identical to WriteVarint32(), but optimized for writing tags. + // In particular, if the input is a compile-time constant, this method + // compiles down to a couple instructions. + // Always inline because otherwise the aformentioned optimization can't work, + // but GCC by default doesn't want to inline this. + bool WriteTag(uint32 value) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; + + // Returns the number of bytes needed to encode the given value as a varint. + static int VarintSize32(uint32 value); + // Returns the number of bytes needed to encode the given value as a varint. + static int VarintSize64(uint64 value); + + // If negative, 10 bytes. Otheriwse, same as VarintSize32(). + static int VarintSize32SignExtended(int32 value); + + // Returns the total number of bytes written since this object was created. + inline int ByteCount() const; + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); + + ZeroCopyOutputStream* output_; + uint8* buffer_; + int buffer_size_; + int total_bytes_; // Sum of sizes of all buffers seen so far. + + // Advance the buffer by a given number of bytes. + void Advance(int amount); + + // Called when the buffer runs out to request more data. Implies an + // Advance(buffer_size_). + bool Refresh(); + + bool WriteVarint32Fallback(uint32 value); + static int VarintSize32Fallback(uint32 value); +}; + +// inline methods ==================================================== +// The vast majority of varints are only one byte. These inline +// methods optimize for that case. + +inline bool CodedInputStream::ReadVarint32(uint32* value) { + if (buffer_size_ != 0 && *buffer_ < 0x80) { + *value = *buffer_; + Advance(1); + return true; + } else { + return ReadVarint32Fallback(value); + } +} + +inline uint32 CodedInputStream::ReadTag() { + if (buffer_size_ != 0 && buffer_[0] < 0x80) { + last_tag_ = buffer_[0]; + Advance(1); + return last_tag_; + } else if (buffer_size_ >= 2 && buffer_[1] < 0x80) { + last_tag_ = (buffer_[0] & 0x7f) + (buffer_[1] << 7); + Advance(2); + return last_tag_; + } else if (ReadVarint32Fallback(&last_tag_)) { + return last_tag_; + } else { + last_tag_ = 0; + return 0; + } +} + +inline bool CodedInputStream::LastTagWas(uint32 expected) { + return last_tag_ == expected; +} + +inline bool CodedInputStream::ConsumedEntireMessage() { + return legitimate_message_end_; +} + +inline bool CodedInputStream::ExpectTag(uint32 expected) { + if (expected < (1 << 7)) { + if (buffer_size_ != 0 && buffer_[0] == expected) { + Advance(1); + return true; + } else { + return false; + } + } else if (expected < (1 << 14)) { + if (buffer_size_ >= 2 && + buffer_[0] == static_cast<uint8>(expected | 0x80) && + buffer_[1] == static_cast<uint8>(expected >> 7)) { + Advance(2); + return true; + } else { + return false; + } + } else { + // Don't bother optimizing for larger values. + return false; + } +} + +inline bool CodedInputStream::ExpectAtEnd() { + // If we are at a limit we know no more bytes can be read. Otherwise, it's + // hard to say without calling Refresh(), and we'd rather not do that. + + if (buffer_size_ == 0 && buffer_size_after_limit_ != 0) { + last_tag_ = 0; // Pretend we called ReadTag()... + legitimate_message_end_ = true; // ... and it hit EOF. + return true; + } else { + return false; + } +} + +inline bool CodedOutputStream::WriteVarint32(uint32 value) { + if (value < 0x80 && buffer_size_ > 0) { + *buffer_ = value; + Advance(1); + return true; + } else { + return WriteVarint32Fallback(value); + } +} + +inline bool CodedOutputStream::WriteVarint32SignExtended(int32 value) { + if (value < 0) { + return WriteVarint64(static_cast<uint64>(value)); + } else { + return WriteVarint32(static_cast<uint32>(value)); + } +} + +inline bool CodedOutputStream::WriteTag(uint32 value) { + if (value < (1 << 7)) { + if (buffer_size_ != 0) { + buffer_[0] = value; + Advance(1); + return true; + } + } else if (value < (1 << 14)) { + if (buffer_size_ >= 2) { + buffer_[0] = static_cast<uint8>(value | 0x80); + buffer_[1] = static_cast<uint8>(value >> 7); + Advance(2); + return true; + } + } + return WriteVarint32Fallback(value); +} + +inline int CodedOutputStream::VarintSize32(uint32 value) { + if (value < (1 << 7)) { + return 1; + } else { + return VarintSize32Fallback(value); + } +} + +inline int CodedOutputStream::VarintSize32SignExtended(int32 value) { + if (value < 0) { + return 10; // TODO(kenton): Make this a symbolic constant. + } else { + return VarintSize32(static_cast<uint32>(value)); + } +} + +inline bool CodedOutputStream::WriteString(const string& str) { + return WriteRaw(str.data(), str.size()); +} + +inline int CodedOutputStream::ByteCount() const { + return total_bytes_ - buffer_size_; +} + +inline void CodedInputStream::Advance(int amount) { + buffer_ += amount; + buffer_size_ -= amount; +} + +inline void CodedOutputStream::Advance(int amount) { + buffer_ += amount; + buffer_size_ -= amount; +} + +inline void CodedInputStream::SetRecursionLimit(int limit) { + recursion_limit_ = limit; +} + +inline bool CodedInputStream::IncrementRecursionDepth() { + ++recursion_depth_; + return recursion_depth_ <= recursion_limit_; +} + +inline void CodedInputStream::DecrementRecursionDepth() { + if (recursion_depth_ > 0) --recursion_depth_; +} + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc new file mode 100644 index 00000000..c1a88349 --- /dev/null +++ b/src/google/protobuf/io/coded_stream_unittest.cc @@ -0,0 +1,929 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains tests and benchmarks. + +#include <vector> + +#include <google/protobuf/io/coded_stream.h> + +#include <limits.h> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/testing/googletest.h> +#include <gtest/gtest.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/stubs/strutil.h> + + +// This declares an unsigned long long integer literal in a portable way. +// (The original macro is way too big and ruins my formatting.) +#undef ULL +#define ULL(x) GOOGLE_ULONGLONG(x) + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// =================================================================== +// Data-Driven Test Infrastructure + +// TEST_1D and TEST_2D are macros I'd eventually like to see added to +// gTest. These macros can be used to declare tests which should be +// run multiple times, once for each item in some input array. TEST_1D +// tests all cases in a single input array. TEST_2D tests all +// combinations of cases from two arrays. The arrays must be statically +// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example: +// +// int kCases[] = {1, 2, 3, 4} +// TEST_1D(MyFixture, MyTest, kCases) { +// EXPECT_GT(kCases_case, 0); +// } +// +// This test iterates through the numbers 1, 2, 3, and 4 and tests that +// they are all grater than zero. In case of failure, the exact case +// which failed will be printed. The case type must be printable using +// ostream::operator<<. + +#define TEST_1D(FIXTURE, NAME, CASES) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType> \ + void DoSingleCase(const CaseType& CASES##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES " case #" << i << ": " << CASES[i]); \ + DoSingleCase(CASES[i]); \ + } \ + } \ + \ + template <typename CaseType> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case) + +#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType1, typename CaseType2> \ + void DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \ + for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \ + << #CASES2 " case #" << j << ": " << CASES2[j]); \ + DoSingleCase(CASES1[i], CASES2[j]); \ + } \ + } \ + } \ + \ + template <typename CaseType1, typename CaseType2> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case) + +// =================================================================== + +class CodedStreamTest : public testing::Test { + protected: + static const int kBufferSize = 1024 * 64; + static uint8 buffer_[kBufferSize]; +}; + +uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize]; + +// We test each operation over a variety of block sizes to insure that +// we test cases where reads or writes cross buffer boundaries, cases +// where they don't, and cases where there is so much buffer left that +// we can use special optimized paths that don't worry about bounds +// checks. +const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; + +// ------------------------------------------------------------------- +// Varint tests. + +struct VarintCase { + uint8 bytes[10]; // Encoded bytes. + int size; // Encoded size, in bytes. + uint64 value; // Parsed value. +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintCase& c) { + return os << c.value; +} + +VarintCase kVarintCases[] = { + // 32-bit values + {{0x00} , 1, 0}, + {{0x01} , 1, 1}, + {{0x7f} , 1, 127}, + {{0xa2, 0x74}, 2, (0x22 << 0) | (0x74 << 7)}, // 14882 + {{0xbe, 0xf7, 0x92, 0x84, 0x0b}, 5, // 2961488830 + (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | + (ULL(0x0b) << 28)}, + + // 64-bit + {{0xbe, 0xf7, 0x92, 0x84, 0x1b}, 5, // 7256456126 + (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | + (ULL(0x1b) << 28)}, + {{0x80, 0xe6, 0xeb, 0x9c, 0xc3, 0xc9, 0xa4, 0x49}, 8, // 41256202580718336 + (0x00 << 0) | (0x66 << 7) | (0x6b << 14) | (0x1c << 21) | + (ULL(0x43) << 28) | (ULL(0x49) << 35) | (ULL(0x24) << 42) | + (ULL(0x49) << 49)}, + // 11964378330978735131 + {{0x9b, 0xa8, 0xf9, 0xc2, 0xbb, 0xd6, 0x80, 0x85, 0xa6, 0x01}, 10, + (0x1b << 0) | (0x28 << 7) | (0x79 << 14) | (0x42 << 21) | + (ULL(0x3b) << 28) | (ULL(0x56) << 35) | (ULL(0x00) << 42) | + (ULL(0x05) << 49) | (ULL(0x26) << 56) | (ULL(0x01) << 63)}, +}; + +TEST_2D(CodedStreamTest, ReadVarint32, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_TRUE(coded_input.ReadVarint32(&value)); + EXPECT_EQ(static_cast<uint32>(kVarintCases_case.value), value); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + EXPECT_EQ(expected_value, coded_input.ReadTag()); + + EXPECT_TRUE(coded_input.LastTagWas(expected_value)); + EXPECT_FALSE(coded_input.LastTagWas(expected_value + 1)); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) { + // Leave one byte at the beginning of the buffer so we can read it + // to force the first buffer to be loaded. + buffer_[0] = '\0'; + memcpy(buffer_ + 1, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_)); + + { + CodedInputStream coded_input(&input); + + // Read one byte to force coded_input.Refill() to be called. Otherwise, + // ExpectTag() will return a false negative. + uint8 dummy; + coded_input.ReadRaw(&dummy, 1); + EXPECT_EQ((uint)'\0', (uint)dummy); + + uint32 expected_value = static_cast<uint32>(kVarintCases_case.value); + + // ExpectTag() produces false negatives for large values. + if (kVarintCases_case.size <= 2) { + EXPECT_FALSE(coded_input.ExpectTag(expected_value + 1)); + EXPECT_TRUE(coded_input.ExpectTag(expected_value)); + } else { + EXPECT_FALSE(coded_input.ExpectTag(expected_value)); + } + } + + if (kVarintCases_case.size <= 2) { + EXPECT_EQ(kVarintCases_case.size + 1, input.ByteCount()); + } else { + EXPECT_EQ(1, input.ByteCount()); + } +} + +TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) { + memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadVarint64(&value)); + EXPECT_EQ(kVarintCases_case.value, value); + } + + EXPECT_EQ(kVarintCases_case.size, input.ByteCount()); +} + +TEST_2D(CodedStreamTest, WriteVarint32, kVarintCases, kBlockSizes) { + if (kVarintCases_case.value > ULL(0x00000000FFFFFFFF)) { + // Skip this test for the 64-bit values. + return; + } + + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteVarint32( + static_cast<uint32>(kVarintCases_case.value))); + + EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount()); + } + + EXPECT_EQ(kVarintCases_case.size, output.ByteCount()); + EXPECT_EQ(0, + memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size)); +} + +TEST_2D(CodedStreamTest, WriteVarint64, kVarintCases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteVarint64(kVarintCases_case.value)); + + EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount()); + } + + EXPECT_EQ(kVarintCases_case.size, output.ByteCount()); + EXPECT_EQ(0, + memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size)); +} + +// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +int32 kSignExtendedVarintCases[] = { + 0, 1, -1, 1237894, -37895138 +}; + +TEST_2D(CodedStreamTest, WriteVarint32SignExtended, + kSignExtendedVarintCases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteVarint32SignExtended( + kSignExtendedVarintCases_case)); + + if (kSignExtendedVarintCases_case < 0) { + EXPECT_EQ(10, coded_output.ByteCount()); + } else { + EXPECT_LE(coded_output.ByteCount(), 5); + } + } + + if (kSignExtendedVarintCases_case < 0) { + EXPECT_EQ(10, output.ByteCount()); + } else { + EXPECT_LE(output.ByteCount(), 5); + } + + // Read value back in as a varint64 and insure it matches. + ArrayInputStream input(buffer_, sizeof(buffer_)); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadVarint64(&value)); + + EXPECT_EQ(kSignExtendedVarintCases_case, static_cast<int64>(value)); + } + + EXPECT_EQ(output.ByteCount(), input.ByteCount()); +} + +#endif + + +// ------------------------------------------------------------------- +// Varint failure test. + +struct VarintErrorCase { + uint8 bytes[12]; + int size; + bool can_parse; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintErrorCase& c) { + return os << "size " << c.size; +} + +const VarintErrorCase kVarintErrorCases[] = { + // Control case. (Insures that there isn't something else wrong that + // makes parsing always fail.) + {{0x00}, 1, true}, + + // No input data. + {{}, 0, false}, + + // Input ends unexpectedly. + {{0xf0, 0xab}, 2, false}, + + // Input ends unexpectedly after 32 bits. + {{0xf0, 0xab, 0xc9, 0x9a, 0xf8, 0xb2}, 6, false}, + + // Longer than 10 bytes. + {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, + 11, false}, +}; + +TEST_2D(CodedStreamTest, ReadVarint32Error, kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint32(&value)); +} + +TEST_2D(CodedStreamTest, ReadVarint64Error, kVarintErrorCases, kBlockSizes) { + memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size); + ArrayInputStream input(buffer_, kVarintErrorCases_case.size, + kBlockSizes_case); + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint64(&value)); +} + +// ------------------------------------------------------------------- +// VarintSize + +struct VarintSizeCase { + uint64 value; + int size; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarintSizeCase& c) { + return os << c.value; +} + +VarintSizeCase kVarintSizeCases[] = { + {0u, 1}, + {1u, 1}, + {127u, 1}, + {128u, 2}, + {758923u, 3}, + {4000000000u, 5}, + {ULL(41256202580718336), 8}, + {ULL(11964378330978735131), 10}, +}; + +TEST_1D(CodedStreamTest, VarintSize32, kVarintSizeCases) { + if (kVarintSizeCases_case.value > 0xffffffffu) { + // Skip 64-bit values. + return; + } + + EXPECT_EQ(kVarintSizeCases_case.size, + CodedOutputStream::VarintSize32( + static_cast<uint32>(kVarintSizeCases_case.value))); +} + +TEST_1D(CodedStreamTest, VarintSize64, kVarintSizeCases) { + EXPECT_EQ(kVarintSizeCases_case.size, + CodedOutputStream::VarintSize64(kVarintSizeCases_case.value)); +} + +// ------------------------------------------------------------------- +// Fixed-size int tests + +struct Fixed32Case { + uint8 bytes[sizeof(uint32)]; // Encoded bytes. + uint32 value; // Parsed value. +}; + +struct Fixed64Case { + uint8 bytes[sizeof(uint64)]; // Encoded bytes. + uint64 value; // Parsed value. +}; + +inline std::ostream& operator<<(std::ostream& os, const Fixed32Case& c) { + return os << "0x" << hex << c.value << dec; +} + +inline std::ostream& operator<<(std::ostream& os, const Fixed64Case& c) { + return os << "0x" << hex << c.value << dec; +} + +Fixed32Case kFixed32Cases[] = { + {{0xef, 0xcd, 0xab, 0x90}, 0x90abcdefu}, + {{0x12, 0x34, 0x56, 0x78}, 0x78563412u}, +}; + +Fixed64Case kFixed64Cases[] = { + {{0xef, 0xcd, 0xab, 0x90, 0x12, 0x34, 0x56, 0x78}, ULL(0x7856341290abcdef)}, + {{0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}, ULL(0x8877665544332211)}, +}; + +TEST_2D(CodedStreamTest, ReadLittleEndian32, kFixed32Cases, kBlockSizes) { + memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint32 value; + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(kFixed32Cases_case.value, value); + } + + EXPECT_EQ(sizeof(uint32), input.ByteCount()); +} + +TEST_2D(CodedStreamTest, ReadLittleEndian64, kFixed64Cases, kBlockSizes) { + memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + uint64 value; + EXPECT_TRUE(coded_input.ReadLittleEndian64(&value)); + EXPECT_EQ(kFixed64Cases_case.value, value); + } + + EXPECT_EQ(sizeof(uint64), input.ByteCount()); +} + +TEST_2D(CodedStreamTest, WriteLittleEndian32, kFixed32Cases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteLittleEndian32(kFixed32Cases_case.value)); + + EXPECT_EQ(sizeof(uint32), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(uint32), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kFixed32Cases_case.bytes, sizeof(uint32))); +} + +TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteLittleEndian64(kFixed64Cases_case.value)); + + EXPECT_EQ(sizeof(uint64), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(uint64), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64))); +} + +// ------------------------------------------------------------------- +// Raw reads and writes + +const char kRawBytes[] = "Some bytes which will be writted and read raw."; + +TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + char read_buffer[sizeof(kRawBytes)]; + + { + CodedInputStream coded_input(&input); + + EXPECT_TRUE(coded_input.ReadRaw(read_buffer, sizeof(kRawBytes))); + EXPECT_EQ(0, memcmp(kRawBytes, read_buffer, sizeof(kRawBytes))); + } + + EXPECT_EQ(sizeof(kRawBytes), input.ByteCount()); +} + +TEST_1D(CodedStreamTest, WriteRaw, kBlockSizes) { + ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedOutputStream coded_output(&output); + + EXPECT_TRUE(coded_output.WriteRaw(kRawBytes, sizeof(kRawBytes))); + + EXPECT_EQ(sizeof(kRawBytes), coded_output.ByteCount()); + } + + EXPECT_EQ(sizeof(kRawBytes), output.ByteCount()); + EXPECT_EQ(0, memcmp(buffer_, kRawBytes, sizeof(kRawBytes))); +} + +TEST_1D(CodedStreamTest, ReadString, kBlockSizes) { + memcpy(buffer_, kRawBytes, sizeof(kRawBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes))); + EXPECT_EQ(kRawBytes, str); + } + + EXPECT_EQ(strlen(kRawBytes), input.ByteCount()); +} + +// Check to make sure ReadString doesn't crash on impossibly large strings. +TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + string str; + // Try to read a gigabyte. + EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30)); + } +} + + +// ------------------------------------------------------------------- +// Skip + +const char kSkipTestBytes[] = + "<Before skipping><To be skipped><After skipping>"; +const char kSkipOutputTestBytes[] = + "-----------------<To be skipped>----------------"; + +TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) { + memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes)); + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, strlen("<Before skipping>"))); + EXPECT_EQ("<Before skipping>", str); + EXPECT_TRUE(coded_input.Skip(strlen("<To be skipped>"))); + EXPECT_TRUE(coded_input.ReadString(&str, strlen("<After skipping>"))); + EXPECT_EQ("<After skipping>", str); + } + + EXPECT_EQ(strlen(kSkipTestBytes), input.ByteCount()); +} + +// ------------------------------------------------------------------- +// Limits + +TEST_1D(CodedStreamTest, BasicLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit = coded_input.PushLimit(8); + + // Read until we hit the limit. + uint32 value; + EXPECT_EQ(8, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(12, input.ByteCount()); +} + +// Test what happens when we push two limits where the second (top) one is +// shorter. +TEST_1D(CodedStreamTest, SmallLimitOnTopOfBigLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit1 = coded_input.PushLimit(8); + EXPECT_EQ(8, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit2 = coded_input.PushLimit(4); + + uint32 value; + + // Read until we hit limit2, the top and shortest limit. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit2); + + // Read until we hit limit1. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit1); + + // No more limits. + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(12, input.ByteCount()); +} + +// Test what happens when we push two limits where the second (top) one is +// longer. In this case, the top limit is shortened to match the previous +// limit. +TEST_1D(CodedStreamTest, BigLimitOnTopOfSmallLimit, kBlockSizes) { + ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case); + + { + CodedInputStream coded_input(&input); + + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit1 = coded_input.PushLimit(4); + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + CodedInputStream::Limit limit2 = coded_input.PushLimit(8); + + uint32 value; + + // Read until we hit limit2. Except, wait! limit1 is shorter, so + // we end up hitting that first, despite having 4 bytes to go on + // limit2. + EXPECT_EQ(4, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit2); + + // OK, popped limit2, now limit1 is on top, which we've already hit. + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + EXPECT_FALSE(coded_input.ReadLittleEndian32(&value)); + EXPECT_EQ(0, coded_input.BytesUntilLimit()); + + coded_input.PopLimit(limit1); + + // No more limits. + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + } + + EXPECT_EQ(8, input.ByteCount()); +} + +TEST_F(CodedStreamTest, ExpectAtEnd) { + // Test ExpectAtEnd(), which is based on limits. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + EXPECT_FALSE(coded_input.ExpectAtEnd()); + + CodedInputStream::Limit limit = coded_input.PushLimit(4); + + uint32 value; + EXPECT_TRUE(coded_input.ReadLittleEndian32(&value)); + EXPECT_TRUE(coded_input.ExpectAtEnd()); + + coded_input.PopLimit(limit); + EXPECT_FALSE(coded_input.ExpectAtEnd()); +} + +TEST_F(CodedStreamTest, NegativeLimit) { + // Check what happens when we push a negative limit. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + CodedInputStream::Limit limit = coded_input.PushLimit(-1234); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, NegativeLimitAfterReading) { + // Check what happens when we push a negative limit. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + ASSERT_TRUE(coded_input.Skip(128)); + + CodedInputStream::Limit limit = coded_input.PushLimit(-64); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, OverflowLimit) { + // Check what happens when we push a limit large enough that its absolute + // position is more than 2GB into the stream. + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + ASSERT_TRUE(coded_input.Skip(128)); + + CodedInputStream::Limit limit = coded_input.PushLimit(INT_MAX); + // BytesUntilLimit() returns -1 to mean "no limit", which actually means + // "the limit is INT_MAX relative to the beginning of the stream". + EXPECT_EQ(-1, coded_input.BytesUntilLimit()); + coded_input.PopLimit(limit); +} + +TEST_F(CodedStreamTest, TotalBytesLimit) { + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + coded_input.SetTotalBytesLimit(16, -1); + + string str; + EXPECT_TRUE(coded_input.ReadString(&str, 16)); + + vector<string> errors; + + { + ScopedMemoryLog error_log; + EXPECT_FALSE(coded_input.ReadString(&str, 1)); + errors = error_log.GetMessages(ERROR); + } + + ASSERT_EQ(1, errors.size()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, + "A protocol message was rejected because it was too big", errors[0]); + + coded_input.SetTotalBytesLimit(32, -1); + EXPECT_TRUE(coded_input.ReadString(&str, 16)); +} + +TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) { + // total_bytes_limit_ is not a valid place for a message to end. + + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + + // Set both total_bytes_limit and a regular limit at 16 bytes. + coded_input.SetTotalBytesLimit(16, -1); + CodedInputStream::Limit limit = coded_input.PushLimit(16); + + // Read 16 bytes. + string str; + EXPECT_TRUE(coded_input.ReadString(&str, 16)); + + // Read a tag. Should fail, but report being a valid endpoint since it's + // a regular limit. + EXPECT_EQ(0, coded_input.ReadTag()); + EXPECT_TRUE(coded_input.ConsumedEntireMessage()); + + // Pop the limit. + coded_input.PopLimit(limit); + + // Read a tag. Should fail, and report *not* being a valid endpoint, since + // this time we're hitting the total bytes limit. + EXPECT_EQ(0, coded_input.ReadTag()); + EXPECT_FALSE(coded_input.ConsumedEntireMessage()); +} + +TEST_F(CodedStreamTest, RecursionLimit) { + ArrayInputStream input(buffer_, sizeof(buffer_)); + CodedInputStream coded_input(&input); + coded_input.SetRecursionLimit(4); + + // This is way too much testing for a counter. + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6 + coded_input.DecrementRecursionDepth(); // 5 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6 + coded_input.DecrementRecursionDepth(); // 5 + coded_input.DecrementRecursionDepth(); // 4 + coded_input.DecrementRecursionDepth(); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + coded_input.DecrementRecursionDepth(); // 4 + coded_input.DecrementRecursionDepth(); // 3 + coded_input.DecrementRecursionDepth(); // 2 + coded_input.DecrementRecursionDepth(); // 1 + coded_input.DecrementRecursionDepth(); // 0 + coded_input.DecrementRecursionDepth(); // 0 + coded_input.DecrementRecursionDepth(); // 0 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3 + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5 + + coded_input.SetRecursionLimit(6); + EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 6 + EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7 +} + +class ReallyBigInputStream : public ZeroCopyInputStream { + public: + ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {} + ~ReallyBigInputStream() {} + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) { + // We only expect BackUp() to be called at the end. + EXPECT_EQ(0, backup_amount_); + + switch (buffer_count_++) { + case 0: + *data = buffer_; + *size = sizeof(buffer_); + return true; + case 1: + // Return an enormously large buffer that, when combined with the 1k + // returned already, should overflow the total_bytes_read_ counter in + // CodedInputStream. Note that we'll only read the first 1024 bytes + // of this buffer so it's OK that we have it point at buffer_. + *data = buffer_; + *size = INT_MAX; + return true; + default: + return false; + } + } + + void BackUp(int count) { + backup_amount_ = count; + } + + bool Skip(int count) { GOOGLE_LOG(FATAL) << "Not implemented."; return false; } + int64 ByteCount() const { GOOGLE_LOG(FATAL) << "Not implemented."; return 0; } + + int backup_amount_; + + private: + char buffer_[1024]; + int64 buffer_count_; +}; + +TEST_F(CodedStreamTest, InputOver2G) { + // CodedInputStream should gracefully handle input over 2G and call + // input.BackUp() with the correct number of bytes on destruction. + ReallyBigInputStream input; + + vector<string> errors; + + { + ScopedMemoryLog error_log; + CodedInputStream coded_input(&input); + string str; + EXPECT_TRUE(coded_input.ReadString(&str, 512)); + EXPECT_TRUE(coded_input.ReadString(&str, 1024)); + errors = error_log.GetMessages(ERROR); + } + + EXPECT_EQ(INT_MAX - 512, input.backup_amount_); + EXPECT_EQ(0, errors.size()); +} + +// =================================================================== + + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/package_info.h b/src/google/protobuf/io/package_info.h new file mode 100644 index 00000000..8272d51d --- /dev/null +++ b/src/google/protobuf/io/package_info.h @@ -0,0 +1,40 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file exists solely to document the google::protobuf::io namespace. +// It is not compiled into anything, but it may be read by an automated +// documentation generator. + +namespace google { + +namespace protobuf { + +// Auxiliary classes used for I/O. +// +// The Protocol Buffer library uses the classes in this package to deal with +// I/O and encoding/decoding raw bytes. Most users will not need to +// deal with this package. However, users who want to adapt the system to +// work with their own I/O abstractions -- e.g., to allow Protocol Buffers +// to be read from a different kind of input stream without the need for a +// temporary buffer -- should take a closer look. +namespace io {} + +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc new file mode 100644 index 00000000..7b3e3de4 --- /dev/null +++ b/src/google/protobuf/io/printer.cc @@ -0,0 +1,165 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <google/protobuf/io/printer.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/strutil.h> + +namespace google { +namespace protobuf { +namespace io { + +Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter) + : variable_delimiter_(variable_delimiter), + output_(output), + buffer_(NULL), + buffer_size_(0), + at_start_of_line_(true), + failed_(false) { +} + +Printer::~Printer() { + // Only BackUp() if we're sure we've successfully called Next() at least once. + if (buffer_size_ > 0) { + output_->BackUp(buffer_size_); + } +} + +void Printer::Print(const map<string, string>& variables, const char* text) { + int size = strlen(text); + int pos = 0; // The number of bytes we've written so far. + + for (int i = 0; i < size; i++) { + if (text[i] == '\n') { + // Saw newline. If there is more text, we may need to insert an indent + // here. So, write what we have so far, including the '\n'. + Write(text + pos, i - pos + 1); + pos = i + 1; + + // Setting this true will cause the next Write() to insert an indent + // first. + at_start_of_line_ = true; + + } else if (text[i] == variable_delimiter_) { + // Saw the start of a variable name. + + // Write what we have so far. + Write(text + pos, i - pos); + pos = i + 1; + + // Find closing delimiter. + const char* end = strchr(text + pos, variable_delimiter_); + if (end == NULL) { + GOOGLE_LOG(DFATAL) << " Unclosed variable name."; + end = text + pos; + } + int endpos = end - text; + + string varname(text + pos, endpos - pos); + if (varname.empty()) { + // Two delimiters in a row reduce to a literal delimiter character. + Write(&variable_delimiter_, 1); + } else { + // Replace with the variable's value. + map<string, string>::const_iterator iter = variables.find(varname); + if (iter == variables.end()) { + GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname; + } else { + Write(iter->second.data(), iter->second.size()); + } + } + + // Advance past this variable. + i = endpos; + pos = endpos + 1; + } + } + + // Write the rest. + Write(text + pos, size - pos); +} + +void Printer::Print(const char* text) { + static map<string, string> empty; + Print(empty, text); +} + +void Printer::Print(const char* text, + const char* variable, const string& value) { + map<string, string> vars; + vars[variable] = value; + Print(vars, text); +} + +void Printer::Print(const char* text, + const char* variable1, const string& value1, + const char* variable2, const string& value2) { + map<string, string> vars; + vars[variable1] = value1; + vars[variable2] = value2; + Print(vars, text); +} + +void Printer::Indent() { + indent_ += " "; +} + +void Printer::Outdent() { + if (indent_.empty()) { + GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent()."; + return; + } + + indent_.resize(indent_.size() - 2); +} + +void Printer::Write(const char* data, int size) { + if (failed_) return; + if (size == 0) return; + + if (at_start_of_line_) { + // Insert an indent. + at_start_of_line_ = false; + Write(indent_.data(), indent_.size()); + if (failed_) return; + } + + while (size > buffer_size_) { + // Data exceeds space in the buffer. Copy what we can and request a + // new buffer. + memcpy(buffer_, data, buffer_size_); + data += buffer_size_; + size -= buffer_size_; + void* void_buffer; + failed_ = !output_->Next(&void_buffer, &buffer_size_); + if (failed_) return; + buffer_ = reinterpret_cast<char*>(void_buffer); + } + + // Buffer is big enough to receive the data; copy it. + memcpy(buffer_, data, size); + buffer_ += size; + buffer_size_ -= size; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h new file mode 100644 index 00000000..ee8f46c8 --- /dev/null +++ b/src/google/protobuf/io/printer.h @@ -0,0 +1,109 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Utility class for writing text to a ZeroCopyOutputStream. + +#ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__ +#define GOOGLE_PROTOBUF_IO_PRINTER_H__ + +#include <string> +#include <map> +#include <google/protobuf/stubs/common.h> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyOutputStream; // zero_copy_stream.h + +// This simple utility class assists in code generation. It basically +// allows the caller to define a set of variables and then output some +// text with variable substitutions. Example usage: +// +// Printer printer(output, '$'); +// map<string, string> vars; +// vars["name"] = "Bob"; +// printer.Print(vars, "My name is $name$."); +// +// The above writes "My name is Bob." to the output stream. +// +// Printer aggressively enforces correct usage, crashing (with assert failures) +// in the case of undefined variables. This helps greatly in debugging code +// which uses it. This class is not intended to be used by production servers. +class LIBPROTOBUF_EXPORT Printer { + public: + // Create a printer that writes text to the given output stream. Use the + // given character as the delimiter for variables. + Printer(ZeroCopyOutputStream* output, char variable_delimiter); + ~Printer(); + + // Print some text after applying variable substitutions. If a particular + // variable in the text is not defined, this will crash. Variables to be + // substituted are identified by their names surrounded by delimiter + // characters (as given to the constructor). The variable bindings are + // defined by the given map. + void Print(const map<string, string>& variables, const char* text); + + // Like the first Print(), except the substitutions are given as parameters. + void Print(const char* text); + // Like the first Print(), except the substitutions are given as parameters. + void Print(const char* text, const char* variable, const string& value); + // Like the first Print(), except the substitutions are given as parameters. + void Print(const char* text, const char* variable1, const string& value1, + const char* variable2, const string& value2); + // TODO(kenton): Overloaded versions with more variables? Two seems + // to be enough. + + // Indent text by two spaces. After calling Indent(), two spaces will be + // inserted at the beginning of each line of text. Indent() may be called + // multiple times to produce deeper indents. + void Indent(); + + // Reduces the current indent level by two spaces, or crashes if the indent + // level is zero. + void Outdent(); + + // True if any write to the underlying stream failed. (We don't just + // crash in this case because this is an I/O failure, not a programming + // error.) + bool failed() const { return failed_; } + + private: + // Write some text to the output buffer. + void Write(const char* data, int size); + + const char variable_delimiter_; + + ZeroCopyOutputStream* const output_; + char* buffer_; + int buffer_size_; + + string indent_; + bool at_start_of_line_; + bool failed_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Printer); +}; + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_PRINTER_H__ diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc new file mode 100644 index 00000000..652728bc --- /dev/null +++ b/src/google/protobuf/io/printer_unittest.cc @@ -0,0 +1,210 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <vector> + +#include <google/protobuf/io/printer.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/testing/googletest.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// Each test repeats over several block sizes in order to test both cases +// where particular writes cross a buffer boundary and cases where they do +// not. + +TEST(Printer, EmptyPrinter) { + char buffer[8192]; + const int block_size = 100; + ArrayOutputStream output(buffer, GOOGLE_ARRAYSIZE(buffer), block_size); + Printer printer(&output, '\0'); + EXPECT_TRUE(!printer.failed()); +} + +TEST(Printer, BasicPrinting) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '\0'); + + printer.Print("Hello World!"); + printer.Print(" This is the same line.\n"); + printer.Print("But this is a new one.\nAnd this is another one."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ(buffer, + "Hello World! This is the same line.\n" + "But this is a new one.\n" + "And this is another one."); + } +} + +TEST(Printer, VariableSubstitution) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '$'); + map<string, string> vars; + + vars["foo"] = "World"; + vars["bar"] = "$foo$"; + vars["abcdefg"] = "1234"; + + printer.Print(vars, "Hello $foo$!\nbar = $bar$\n"); + printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$"); + + vars["foo"] = "blah"; + printer.Print(vars, "\nNow foo = $foo$."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ(buffer, + "Hello World!\n" + "bar = $foo$\n" + "1234\n" + "A literal dollar sign: $\n" + "Now foo = blah."); + } +} + +TEST(Printer, InlineVariableSubstitution) { + char buffer[8192]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + + { + Printer printer(&output, '$'); + printer.Print("Hello $foo$!\n", "foo", "World"); + printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two"); + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ(buffer, + "Hello World!\n" + "one two\n"); +} + +TEST(Printer, Indenting) { + char buffer[8192]; + + for (int block_size = 1; block_size < 512; block_size *= 2) { + ArrayOutputStream output(buffer, sizeof(buffer), block_size); + + { + Printer printer(&output, '$'); + map<string, string> vars; + + vars["newline"] = "\n"; + + printer.Print("This is not indented.\n"); + printer.Indent(); + printer.Print("This is indented\nAnd so is this\n"); + printer.Outdent(); + printer.Print("But this is not."); + printer.Indent(); + printer.Print(" And this is still the same line.\n" + "But this is indented.\n"); + printer.Print(vars, "Note that a newline in a variable will break " + "indenting, as we see$newline$here.\n"); + printer.Indent(); + printer.Print("And this"); + printer.Outdent(); + printer.Outdent(); + printer.Print(" is double-indented\nBack to normal."); + + EXPECT_FALSE(printer.failed()); + } + + buffer[output.ByteCount()] = '\0'; + + EXPECT_STREQ(buffer, + "This is not indented.\n" + " This is indented\n" + " And so is this\n" + "But this is not. And this is still the same line.\n" + " But this is indented.\n" + " Note that a newline in a variable will break indenting, as we see\n" + "here.\n" + " And this is double-indented\n" + "Back to normal."); + } +} + +// Death tests do not work on Windows as of yet. +#ifdef GTEST_HAS_DEATH_TEST +TEST(Printer, Death) { + char buffer[8192]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + Printer printer(&output, '$'); + + EXPECT_DEBUG_DEATH(printer.Print("$nosuchvar$"), "Undefined variable"); + EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name"); + EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent"); +} +#endif // GTEST_HAS_DEATH_TEST + +TEST(Printer, WriteFailure) { + char buffer[16]; + + ArrayOutputStream output(buffer, sizeof(buffer)); + Printer printer(&output, '$'); + + // Print 16 bytes to fill the buffer exactly (should not fail). + printer.Print("0123456789abcdef"); + EXPECT_FALSE(printer.failed()); + + // Try to print one more byte (should fail). + printer.Print(" "); + EXPECT_TRUE(printer.failed()); + + // Should not crash + printer.Print("blah"); + EXPECT_TRUE(printer.failed()); + + // Buffer should contain the first 16 bytes written. + EXPECT_EQ("0123456789abcdef", string(buffer, sizeof(buffer))); +} + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc new file mode 100644 index 00000000..3864fcfb --- /dev/null +++ b/src/google/protobuf/io/tokenizer.cc @@ -0,0 +1,679 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Here we have a hand-written lexer. At first you might ask yourself, +// "Hand-written text processing? Is Kenton crazy?!" Well, first of all, +// yes I am crazy, but that's beside the point. There are actually reasons +// why I ended up writing this this way. +// +// The traditional approach to lexing is to use lex to generate a lexer for +// you. Unfortunately, lex's output is ridiculously ugly and difficult to +// integrate cleanly with C++ code, especially abstract code or code meant +// as a library. Better parser-generators exist but would add dependencies +// which most users won't already have, which we'd like to avoid. (GNU flex +// has a C++ output option, but it's still ridiculously ugly, non-abstract, +// and not library-friendly.) +// +// The next approach that any good software engineer should look at is to +// use regular expressions. And, indeed, I did. I have code which +// implements this same class using regular expressions. It's about 200 +// lines shorter. However: +// - Rather than error messages telling you "This string has an invalid +// escape sequence at line 5, column 45", you get error messages like +// "Parse error on line 5". Giving more precise errors requires adding +// a lot of code that ends up basically as complex as the hand-coded +// version anyway. +// - The regular expression to match a string literal looks like this: +// kString = new RE("(\"([^\"\\\\]|" // non-escaped +// "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape +// "\\\\x[0-9a-fA-F])*\"|" // hex escape +// "\'([^\'\\\\]|" // Also support single-quotes. +// "\\\\[abfnrtv?\"'\\\\0-7]|" +// "\\\\x[0-9a-fA-F])*\')"); +// Verifying the correctness of this line noise is actually harder than +// verifying the correctness of ConsumeString(), defined below. I'm not +// even confident that the above is correct, after staring at it for some +// time. +// - PCRE is fast, but there's still more overhead involved than the code +// below. +// - Sadly, regular expressions are not part of the C standard library, so +// using them would require depending on some other library. For the +// open source release, this could be really annoying. Nobody likes +// downloading one piece of software just to find that they need to +// download something else to make it work, and in all likelihood +// people downloading Protocol Buffers will already be doing so just +// to make something else work. We could include a copy of PCRE with +// our code, but that obligates us to keep it up-to-date and just seems +// like a big waste just to save 200 lines of code. +// +// On a similar but unrelated note, I'm even scared to use ctype.h. +// Apparently functions like isalpha() are locale-dependent. So, if we used +// that, then if this code is being called from some program that doesn't +// have its locale set to "C", it would behave strangely. We can't just set +// the locale to "C" ourselves since we might break the calling program that +// way, particularly if it is multi-threaded. WTF? Someone please let me +// (Kenton) know if I'm missing something here... +// +// I'd love to hear about other alternatives, though, as this code isn't +// exactly pretty. + +#include <google/protobuf/io/tokenizer.h> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/strutil.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// As mentioned above, I don't trust ctype.h due to the presence of "locales". +// So, I have written replacement functions here. Someone please smack me if +// this is a bad idea or if there is some way around this. +// +// These "character classes" are designed to be used in template methods. +// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat +// whitespace. + +// Note: No class is allowed to contain '\0', since this is used to mark end- +// of-input and is handled specially. + +#define CHARACTER_CLASS(NAME, EXPRESSION) \ + class NAME { \ + public: \ + static inline bool InClass(char c) { \ + return EXPRESSION; \ + } \ + } + +CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' || + c == '\r' || c == '\v'); + +CHARACTER_CLASS(Unprintable, c < ' ' && c != '\0'); + +CHARACTER_CLASS(Digit, '0' <= c && c <= '9'); +CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7'); +CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F')); + +CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + (c == '_')); + +CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9') || + (c == '_')); + +CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' || + c == 'r' || c == 't' || c == 'v' || c == '\\' || + c == '?' || c == '\'' || c == '\"'); + +#undef CHARACTER_CLASS + +// Given a char, interpret it as a numeric digit and return its value. +// This supports any number base up to 36. +inline int DigitValue(char digit) { + if ('0' <= digit && digit <= '9') return digit - '0'; + if ('a' <= digit && digit <= 'z') return digit - 'a' + 10; + if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10; + return -1; +} + +// Inline because it's only used in one place. +inline char TranslateEscape(char c) { + switch (c) { + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case '\\': return '\\'; + case '?': return '\?'; // Trigraphs = :( + case '\'': return '\''; + case '"': return '\"'; + + // We expect escape sequences to have been validated separately. + default: return '?'; + } +} + +} // anonymous namespace + +ErrorCollector::~ErrorCollector() {} + +// =================================================================== + +Tokenizer::Tokenizer(ZeroCopyInputStream* input, + ErrorCollector* error_collector) + : input_(input), + error_collector_(error_collector), + buffer_(NULL), + buffer_size_(0), + buffer_pos_(0), + read_error_(false), + line_(0), + column_(0), + token_start_(-1), + allow_f_after_float_(false), + comment_style_(CPP_COMMENT_STYLE) { + + current_.line = 0; + current_.column = 0; + current_.type = TYPE_START; + + Refresh(); +} + +Tokenizer::~Tokenizer() { + // If we had any buffer left unread, return it to the underlying stream + // so that someone else can read it. + if (buffer_size_ > buffer_pos_) { + input_->BackUp(buffer_size_ - buffer_pos_); + } +} + +// ------------------------------------------------------------------- +// Internal helpers. + +void Tokenizer::NextChar() { + // Update our line and column counters based on the character being + // consumed. + if (current_char_ == '\n') { + ++line_; + column_ = 0; + } else if (current_char_ == '\t') { + column_ += kTabWidth - column_ % kTabWidth; + } else { + ++column_; + } + + // Advance to the next character. + ++buffer_pos_; + if (buffer_pos_ < buffer_size_) { + current_char_ = buffer_[buffer_pos_]; + } else { + Refresh(); + } +} + +void Tokenizer::Refresh() { + if (read_error_) { + current_char_ = '\0'; + return; + } + + // If we're in a token, append the rest of the buffer to it. + if (token_start_ >= 0 && token_start_ < buffer_size_) { + current_.text.append(buffer_ + token_start_, buffer_size_ - token_start_); + token_start_ = 0; + } + + const void* data = NULL; + buffer_ = NULL; + buffer_pos_ = 0; + do { + if (!input_->Next(&data, &buffer_size_)) { + // end of stream (or read error) + buffer_size_ = 0; + read_error_ = true; + current_char_ = '\0'; + return; + } + } while (buffer_size_ == 0); + + buffer_ = static_cast<const char*>(data); + + current_char_ = buffer_[0]; +} + +inline void Tokenizer::StartToken() { + token_start_ = buffer_pos_; + current_.type = TYPE_START; // Just for the sake of initializing it. + current_.text.clear(); + current_.line = line_; + current_.column = column_; +} + +inline void Tokenizer::EndToken() { + // Note: The if() is necessary because some STL implementations crash when + // you call string::append(NULL, 0), presumably because they are trying to + // be helpful by detecting the NULL pointer, even though there's nothing + // wrong with reading zero bytes from NULL. + if (buffer_pos_ != token_start_) { + current_.text.append(buffer_ + token_start_, buffer_pos_ - token_start_); + } + token_start_ = -1; +} + +// ------------------------------------------------------------------- +// Helper methods that consume characters. + +template<typename CharacterClass> +inline bool Tokenizer::LookingAt() { + return CharacterClass::InClass(current_char_); +} + +template<typename CharacterClass> +inline bool Tokenizer::TryConsumeOne() { + if (CharacterClass::InClass(current_char_)) { + NextChar(); + return true; + } else { + return false; + } +} + +inline bool Tokenizer::TryConsume(char c) { + if (current_char_ == c) { + NextChar(); + return true; + } else { + return false; + } +} + +template<typename CharacterClass> +inline void Tokenizer::ConsumeZeroOrMore() { + while (CharacterClass::InClass(current_char_)) { + NextChar(); + } +} + +template<typename CharacterClass> +inline void Tokenizer::ConsumeOneOrMore(const char* error) { + if (!CharacterClass::InClass(current_char_)) { + AddError(error); + } else { + do { + NextChar(); + } while (CharacterClass::InClass(current_char_)); + } +} + +// ------------------------------------------------------------------- +// Methods that read whole patterns matching certain kinds of tokens +// or comments. + +void Tokenizer::ConsumeString(char delimiter) { + while (true) { + switch (current_char_) { + case '\0': + case '\n': { + AddError("String literals cannot cross line boundaries."); + return; + } + + case '\\': { + // An escape sequence. + NextChar(); + if (TryConsumeOne<Escape>()) { + // Valid escape sequence. + } else if (TryConsumeOne<OctalDigit>()) { + // Possibly followed by two more octal digits, but these will + // just be consumed by the main loop anyway so we don't need + // to do so explicitly here. + } else if (TryConsume('x') || TryConsume('X')) { + if (!TryConsumeOne<HexDigit>()) { + AddError("Expected hex digits for escape sequence."); + } + // Possibly followed by another hex digit, but again we don't care. + } else { + AddError("Invalid escape sequence in string literal."); + } + break; + } + + default: { + if (current_char_ == delimiter) { + NextChar(); + return; + } + NextChar(); + break; + } + } + } +} + +Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, + bool started_with_dot) { + bool is_float = false; + + if (started_with_zero && (TryConsume('x') || TryConsume('X'))) { + // A hex number (started with "0x"). + ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits."); + + } else if (started_with_zero && LookingAt<Digit>()) { + // An octal number (had a leading zero). + ConsumeZeroOrMore<OctalDigit>(); + if (LookingAt<Digit>()) { + AddError("Numbers starting with leading zero must be in octal."); + ConsumeZeroOrMore<Digit>(); + } + + } else { + // A decimal number. + if (started_with_dot) { + is_float = true; + ConsumeZeroOrMore<Digit>(); + } else { + ConsumeZeroOrMore<Digit>(); + + if (TryConsume('.')) { + is_float = true; + ConsumeZeroOrMore<Digit>(); + } + } + + if (TryConsume('e') || TryConsume('E')) { + is_float = true; + TryConsume('-') || TryConsume('+'); + ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent."); + } + + if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) { + is_float = true; + } + } + + if (LookingAt<Letter>()) { + AddError("Need space between number and identifier."); + } else if (current_char_ == '.') { + if (is_float) { + AddError( + "Already saw decimal point or exponent; can't have another one."); + } else { + AddError("Hex and octal numbers must be integers."); + } + } + + return is_float ? TYPE_FLOAT : TYPE_INTEGER; +} + +void Tokenizer::ConsumeLineComment() { + while (current_char_ != '\0' && current_char_ != '\n') { + NextChar(); + } + TryConsume('\n'); +} + +void Tokenizer::ConsumeBlockComment() { + int start_line = line_; + int start_column = column_ - 2; + + while (true) { + while (current_char_ != '\0' && + current_char_ != '*' && + current_char_ != '/') { + NextChar(); + } + + if (TryConsume('*') && TryConsume('/')) { + // End of comment. + break; + } else if (TryConsume('/') && current_char_ == '*') { + // Note: We didn't consume the '*' because if there is a '/' after it + // we want to interpret that as the end of the comment. + AddError( + "\"/*\" inside block comment. Block comments cannot be nested."); + } else if (current_char_ == '\0') { + AddError("End-of-file inside block comment."); + error_collector_->AddError( + start_line, start_column, " Comment started here."); + break; + } + } +} + +// ------------------------------------------------------------------- + +bool Tokenizer::Next() { + TokenType last_token_type = current_.type; + + // Did we skip any characters after the last token? + bool skipped_stuff = false; + + while (!read_error_) { + if (TryConsumeOne<Whitespace>()) { + ConsumeZeroOrMore<Whitespace>(); + + } else if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) { + // Starting a comment? + if (TryConsume('/')) { + ConsumeLineComment(); + } else if (TryConsume('*')) { + ConsumeBlockComment(); + } else { + // Oops, it was just a slash. Return it. + current_.type = TYPE_SYMBOL; + current_.text = "/"; + current_.line = line_; + current_.column = column_ - 1; + return true; + } + + } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) { + ConsumeLineComment(); + + } else if (LookingAt<Unprintable>() || current_char_ == '\0') { + AddError("Invalid control characters encountered in text."); + NextChar(); + // Skip more unprintable characters, too. But, remember that '\0' is + // also what current_char_ is set to after EOF / read error. We have + // to be careful not to go into an infinite loop of trying to consume + // it, so make sure to check read_error_ explicitly before consuming + // '\0'. + while (TryConsumeOne<Unprintable>() || + (!read_error_ && TryConsume('\0'))) { + // Ignore. + } + + } else { + // Reading some sort of token. + StartToken(); + + if (TryConsumeOne<Letter>()) { + ConsumeZeroOrMore<Alphanumeric>(); + current_.type = TYPE_IDENTIFIER; + } else if (TryConsume('0')) { + current_.type = ConsumeNumber(true, false); + } else if (TryConsume('.')) { + // This could be the beginning of a floating-point number, or it could + // just be a '.' symbol. + + if (TryConsumeOne<Digit>()) { + // It's a floating-point number. + if (last_token_type == TYPE_IDENTIFIER && !skipped_stuff) { + // We don't accept syntax like "blah.123". + error_collector_->AddError(line_, column_ - 2, + "Need space between identifier and decimal point."); + } + current_.type = ConsumeNumber(false, true); + } else { + current_.type = TYPE_SYMBOL; + } + } else if (TryConsumeOne<Digit>()) { + current_.type = ConsumeNumber(false, false); + } else if (TryConsume('\"')) { + ConsumeString('\"'); + current_.type = TYPE_STRING; + } else if (TryConsume('\'')) { + ConsumeString('\''); + current_.type = TYPE_STRING; + } else { + NextChar(); + current_.type = TYPE_SYMBOL; + } + + EndToken(); + return true; + } + + skipped_stuff = true; + } + + // EOF + current_.type = TYPE_END; + current_.text.clear(); + current_.line = line_; + current_.column = column_; + return false; +} + +// ------------------------------------------------------------------- +// Token-parsing helpers. Remember that these don't need to report +// errors since any errors should already have been reported while +// tokenizing. Also, these can assume that whatever text they +// are given is text that the tokenizer actually parsed as a token +// of the given type. + +bool Tokenizer::ParseInteger(const string& text, uint64 max_value, + uint64* output) { + // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull() + // is non-standard. I hate the C standard library. :( + +// return strtoull(text.c_str(), NULL, 0); + + const char* ptr = text.c_str(); + int base = 10; + if (ptr[0] == '0') { + if (ptr[1] == 'x') { + // This is hex. + base = 16; + ptr += 2; + } else { + // This is octal. + base = 8; + } + } + + uint64 result = 0; + for (; *ptr != '\0'; ptr++) { + int digit = DigitValue(*ptr); + GOOGLE_LOG_IF(DFATAL, digit < 0 || digit >= base) + << " Tokenizer::ParseInteger() passed text that could not have been" + " tokenized as an integer: " << CEscape(text); + if (digit > max_value || result > (max_value - digit) / base) { + // Overflow. + return false; + } + result = result * base + digit; + } + + *output = result; + return true; +} + +double Tokenizer::ParseFloat(const string& text) { + const char* start = text.c_str(); + char* end; + double result = NoLocaleStrtod(start, &end); + + // "1e" is not a valid float, but if the tokenizer reads it, it will + // report an error but still return it as a valid token. We need to + // accept anything the tokenizer could possibly return, error or not. + if (*end == 'e' || *end == 'E') { + ++end; + if (*end == '-' || *end == '+') ++end; + } + + // If the Tokenizer had allow_f_after_float_ enabled, the float may be + // suffixed with the letter 'f'. + if (*end == 'f' || *end == 'F') { + ++end; + } + + GOOGLE_LOG_IF(DFATAL, end - start != text.size() || *start == '-') + << " Tokenizer::ParseFloat() passed text that could not have been" + " tokenized as a float: " << CEscape(text); + return result; +} + +void Tokenizer::ParseString(const string& text, string* output) { + output->clear(); + + // Reminder: text[0] is always the quote character. (If text is + // empty, it's invalid, so we'll just return.) + if (text.empty()) { + GOOGLE_LOG(DFATAL) + << " ParseString::ParseString() passed text that could not have been" + " tokenized as a string: " << CEscape(text); + return; + } + + output->reserve(text.size()); + + // Loop through the string copying characters to "output" and + // interpreting escape sequences. Note that any invalid escape + // sequences or other errors were already reported while tokenizing. + // In this case we do not need to produce valid results. + for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) { + if (*ptr == '\\' && ptr[1] != '\0') { + // An escape sequence. + ++ptr; + + if (OctalDigit::InClass(*ptr)) { + // An octal escape. May one, two, or three digits. + int code = DigitValue(*ptr); + if (OctalDigit::InClass(ptr[1])) { + ++ptr; + code = code * 8 + DigitValue(*ptr); + } + if (OctalDigit::InClass(ptr[1])) { + ++ptr; + code = code * 8 + DigitValue(*ptr); + } + output->push_back(static_cast<char>(code)); + + } else if (*ptr == 'x') { + // A hex escape. May zero, one, or two digits. (The zero case + // will have been caught as an error earlier.) + int code = 0; + if (HexDigit::InClass(ptr[1])) { + ++ptr; + code = DigitValue(*ptr); + } + if (HexDigit::InClass(ptr[1])) { + ++ptr; + code = code * 16 + DigitValue(*ptr); + } + output->push_back(static_cast<char>(code)); + + } else { + // Some other escape code. + output->push_back(TranslateEscape(*ptr)); + } + + } else if (*ptr == text[0]) { + // Ignore quote matching the starting quote. + } else { + output->push_back(*ptr); + } + } + + return; +} + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h new file mode 100644 index 00000000..841564ce --- /dev/null +++ b/src/google/protobuf/io/tokenizer.h @@ -0,0 +1,276 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Class for parsing tokenized text from a ZeroCopyInputStream. + +#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__ +#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ + +#include <string> +#include <google/protobuf/stubs/common.h> + +namespace google { +namespace protobuf { +namespace io { + +class ZeroCopyInputStream; // zero_copy_stream.h + +// Defined in this file. +class ErrorCollector; +class Tokenizer; + +// Abstract interface for an object which collects the errors that occur +// during parsing. A typical implementation might simply print the errors +// to stdout. +class LIBPROTOBUF_EXPORT ErrorCollector { + public: + inline ErrorCollector() {} + virtual ~ErrorCollector(); + + // Indicates that there was an error in the input at the given line and + // column numbers. The numbers are zero-based, so you may want to add + // 1 to each before printing them. + virtual void AddError(int line, int column, const string& message) = 0; + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); +}; + +// This class converts a stream of raw text into a stream of tokens for +// the protocol definition parser to parse. The tokens recognized are +// similar to those that make up the C language; see the TokenType enum for +// precise descriptions. Whitespace and comments are skipped. By default, +// C- and C++-style comments are recognized, but other styles can be used by +// calling set_comment_style(). +class LIBPROTOBUF_EXPORT Tokenizer { + public: + // Construct a Tokenizer that reads and tokenizes text from the given + // input stream and writes errors to the given error_collector. + // The caller keeps ownership of input and error_collector. + Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector); + ~Tokenizer(); + + enum TokenType { + TYPE_START, // Next() has not yet been called. + TYPE_END, // End of input reached. "text" is empty. + + TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not + // starting with a digit. It is an error for a number + // to be followed by an identifier with no space in + // between. + TYPE_INTEGER, // A sequence of digits representing an integer. Normally + // the digits are decimal, but a prefix of "0x" indicates + // a hex number and a leading zero indicates octal, just + // like with C numeric literals. A leading negative sign + // is NOT included in the token; it's up to the parser to + // interpret the unary minus operator on its own. + TYPE_FLOAT, // A floating point literal, with a fractional part and/or + // an exponent. Always in decimal. Again, never + // negative. + TYPE_STRING, // A quoted sequence of escaped characters. Either single + // or double quotes can be used, but they must match. + // A string literal cannot cross a line break. + TYPE_SYMBOL, // Any other printable character, like '!' or '+'. + // Symbols are always a single character, so "!+$%" is + // four tokens. + }; + + // Structure representing a token read from the token stream. + struct Token { + TokenType type; + string text; // The exact text of the token as it appeared in + // the input. e.g. tokens of TYPE_STRING will still + // be escaped and in quotes. + + // "line" and "column" specify the position of the first character of + // the token within the input stream. They are zero-based. + int line; + int column; + }; + + // Get the current token. This is updated when Next() is called. Before + // the first call to Next(), current() has type TYPE_START and no contents. + const Token& current(); + + // Advance to the next token. Returns false if the end of the input is + // reached. + bool Next(); + + // Parse helpers --------------------------------------------------- + + // Parses a TYPE_FLOAT token. This never fails, so long as the text actually + // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static double ParseFloat(const string& text); + + // Parses a TYPE_STRING token. This never fails, so long as the text actually + // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the + // result is undefined (possibly an assert failure). + static void ParseString(const string& text, string* output); + + // Parses a TYPE_INTEGER token. Returns false if the result would be + // greater than max_value. Otherwise, returns true and sets *output to the + // result. If the text is not from a Token of type TYPE_INTEGER originally + // parsed by a Tokenizer, the result is undefined (possibly an assert + // failure). + static bool ParseInteger(const string& text, uint64 max_value, + uint64* output); + + // Options --------------------------------------------------------- + + // Set true to allow floats to be suffixed with the letter 'f'. Tokens + // which would otherwise be integers but which have the 'f' suffix will be + // forced to be interpreted as floats. For all other purposes, the 'f' is + // ignored. + void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; } + + // Valid values for set_comment_style(). + enum CommentStyle { + // Line comments begin with "//", block comments are delimited by "/*" and + // "*/". + CPP_COMMENT_STYLE, + // Line comments begin with "#". No way to write block comments. + SH_COMMENT_STYLE + }; + + // Sets the comment style. + void set_comment_style(CommentStyle style) { comment_style_ = style; } + + // ----------------------------------------------------------------- + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); + + Token current_; // Returned by current(). + + ZeroCopyInputStream* input_; + ErrorCollector* error_collector_; + + char current_char_; // == buffer_[buffer_pos_], updated by NextChar(). + const char* buffer_; // Current buffer returned from input_. + int buffer_size_; // Size of buffer_. + int buffer_pos_; // Current position within the buffer. + bool read_error_; // Did we previously encounter a read error? + + // Line and column number of current_char_ within the whole input stream. + int line_; + int column_; + + // Position in buffer_ where StartToken() was called. If the token + // started in the previous buffer, this is zero, and current_.text already + // contains the part of the token from the previous buffer. If not + // currently parsing a token, this is -1. + int token_start_; + + // Options. + bool allow_f_after_float_; + CommentStyle comment_style_; + + // Since we count columns we need to interpret tabs somehow. We'll take + // the standard 8-character definition for lack of any way to do better. + static const int kTabWidth = 8; + + // ----------------------------------------------------------------- + // Helper methods. + + // Consume this character and advance to the next one. + void NextChar(); + + // Read a new buffer from the input. + void Refresh(); + + // Called when the current character is the first character of a new + // token (not including whitespace or comments). + inline void StartToken(); + // Called when the current character is the first character after the + // end of the last token. After this returns, current_.text will + // contain all text consumed since StartToken() was called. + inline void EndToken(); + + // Convenience method to add an error at the current line and column. + void AddError(const string& message) { + error_collector_->AddError(line_, column_, message); + } + + // ----------------------------------------------------------------- + // The following four methods are used to consume tokens of specific + // types. They are actually used to consume all characters *after* + // the first, since the calling function consumes the first character + // in order to decide what kind of token is being read. + + // Read and consume a string, ending when the given delimiter is + // consumed. + void ConsumeString(char delimiter); + + // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER + // depending on what was read. This needs to know if the first + // character was a zero in order to correctly recognize hex and octal + // numbers. + // It also needs to know if the first characted was a . to parse floating + // point correctly. + TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); + + // Consume the rest of a line. + void ConsumeLineComment(); + // Consume until "*/". + void ConsumeBlockComment(); + + // ----------------------------------------------------------------- + // These helper methods make the parsing code more readable. The + // "character classes" refered to are defined at the top of the .cc file. + // Basically it is a C++ class with one method: + // static bool InClass(char c); + // The method returns true if c is a member of this "class", like "Letter" + // or "Digit". + + // Returns true if the current character is of the given character + // class, but does not consume anything. + template<typename CharacterClass> + inline bool LookingAt(); + + // If the current character is in the given class, consume it and return + // true. Otherwise return false. + // e.g. TryConsumeOne<Letter>() + template<typename CharacterClass> + inline bool TryConsumeOne(); + + // Like above, but try to consume the specific character indicated. + inline bool TryConsume(char c); + + // Consume zero or more of the given character class. + template<typename CharacterClass> + inline void ConsumeZeroOrMore(); + + // Consume one or more of the given character class or log the given + // error message. + // e.g. ConsumeOneOrMore<Digit>("Expected digits."); + template<typename CharacterClass> + inline void ConsumeOneOrMore(const char* error); +}; + +// inline methods ==================================================== +inline const Tokenizer::Token& Tokenizer::current() { + return current_; +} + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__ diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc new file mode 100644 index 00000000..e2cede7e --- /dev/null +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -0,0 +1,706 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <vector> +#include <math.h> +#include <limits.h> + +#include <google/protobuf/io/tokenizer.h> +#include <google/protobuf/io/zero_copy_stream_impl.h> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/strutil.h> +#include <google/protobuf/stubs/substitute.h> +#include <google/protobuf/testing/googletest.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +// =================================================================== +// Data-Driven Test Infrastructure + +// TODO(kenton): This is copied from coded_stream_unittest. This is +// temporary until these fetaures are integrated into gTest itself. + +// TEST_1D and TEST_2D are macros I'd eventually like to see added to +// gTest. These macros can be used to declare tests which should be +// run multiple times, once for each item in some input array. TEST_1D +// tests all cases in a single input array. TEST_2D tests all +// combinations of cases from two arrays. The arrays must be statically +// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example: +// +// int kCases[] = {1, 2, 3, 4} +// TEST_1D(MyFixture, MyTest, kCases) { +// EXPECT_GT(kCases_case, 0); +// } +// +// This test iterates through the numbers 1, 2, 3, and 4 and tests that +// they are all grater than zero. In case of failure, the exact case +// which failed will be printed. The case type must be printable using +// ostream::operator<<. + +#define TEST_1D(FIXTURE, NAME, CASES) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType> \ + void DoSingleCase(const CaseType& CASES##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES " case #" << i << ": " << CASES[i]); \ + DoSingleCase(CASES[i]); \ + } \ + } \ + \ + template <typename CaseType> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case) + +#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \ + class FIXTURE##_##NAME##_DD : public FIXTURE { \ + protected: \ + template <typename CaseType1, typename CaseType2> \ + void DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case); \ + }; \ + \ + TEST_F(FIXTURE##_##NAME##_DD, NAME) { \ + for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \ + for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \ + SCOPED_TRACE(testing::Message() \ + << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \ + << #CASES2 " case #" << j << ": " << CASES2[j]); \ + DoSingleCase(CASES1[i], CASES2[j]); \ + } \ + } \ + } \ + \ + template <typename CaseType1, typename CaseType2> \ + void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \ + const CaseType2& CASES2##_case) + +// ------------------------------------------------------------------- + +// An input stream that is basically like an ArrayInputStream but sometimes +// returns empty buffers, just to throw us off. +class TestInputStream : public ZeroCopyInputStream { + public: + TestInputStream(const void* data, int size, int block_size) + : array_stream_(data, size, block_size), counter_(0) {} + ~TestInputStream() {} + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) { + // We'll return empty buffers starting with the first buffer, and every + // 3 and 5 buffers after that. + if (counter_ % 3 == 0 || counter_ % 5 == 0) { + *data = NULL; + *size = 0; + ++counter_; + return true; + } else { + ++counter_; + return array_stream_.Next(data, size); + } + } + + void BackUp(int count) { return array_stream_.BackUp(count); } + bool Skip(int count) { return array_stream_.Skip(count); } + int64 ByteCount() const { return array_stream_.ByteCount(); } + + private: + ArrayInputStream array_stream_; + int counter_; +}; + +// ------------------------------------------------------------------- + +// An error collector which simply concatenates all its errors into a big +// block of text which can be checked. +class TestErrorCollector : public ErrorCollector { + public: + TestErrorCollector() {} + ~TestErrorCollector() {} + + string text_; + + // implements ErrorCollector --------------------------------------- + void AddError(int line, int column, const string& message) { + strings::SubstituteAndAppend(&text_, "$0:$1: $2\n", + line, column, message); + } +}; + +// ------------------------------------------------------------------- + +// We test each operation over a variety of block sizes to insure that +// we test cases where reads cross buffer boundaries as well as cases +// where they don't. This is sort of a brute-force approach to this, +// but it's easy to write and easy to understand. +const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024}; + +class TokenizerTest : public testing::Test { + protected: + // For easy testing. + uint64 ParseInteger(const string& text) { + uint64 result; + EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result)); + return result; + } +}; + +// =================================================================== + +// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +// In each test case, the entire input text should parse as a single token +// of the given type. +struct SimpleTokenCase { + string input; + Tokenizer::TokenType type; +}; + +inline ostream& operator<<(ostream& out, + const SimpleTokenCase& test_case) { + return out << CEscape(test_case.input); +} + +SimpleTokenCase kSimpleTokenCases[] = { + // Test identifiers. + { "hello", Tokenizer::TYPE_IDENTIFIER }, + + // Test integers. + { "123", Tokenizer::TYPE_INTEGER }, + { "0xab6", Tokenizer::TYPE_INTEGER }, + { "0XAB6", Tokenizer::TYPE_INTEGER }, + { "0X1234567", Tokenizer::TYPE_INTEGER }, + { "0x89abcdef", Tokenizer::TYPE_INTEGER }, + { "0x89ABCDEF", Tokenizer::TYPE_INTEGER }, + { "01234567", Tokenizer::TYPE_INTEGER }, + + // Test floats. + { "123.45", Tokenizer::TYPE_FLOAT }, + { "1.", Tokenizer::TYPE_FLOAT }, + { "1e3", Tokenizer::TYPE_FLOAT }, + { "1E3", Tokenizer::TYPE_FLOAT }, + { "1e-3", Tokenizer::TYPE_FLOAT }, + { "1e+3", Tokenizer::TYPE_FLOAT }, + { "1.e3", Tokenizer::TYPE_FLOAT }, + { "1.2e3", Tokenizer::TYPE_FLOAT }, + { ".1", Tokenizer::TYPE_FLOAT }, + { ".1e3", Tokenizer::TYPE_FLOAT }, + { ".1e-3", Tokenizer::TYPE_FLOAT }, + { ".1e+3", Tokenizer::TYPE_FLOAT }, + + // Test strings. + { "'hello'", Tokenizer::TYPE_STRING }, + { "\"foo\"", Tokenizer::TYPE_STRING }, + { "'a\"b'", Tokenizer::TYPE_STRING }, + { "\"a'b\"", Tokenizer::TYPE_STRING }, + { "'a\\'b'", Tokenizer::TYPE_STRING }, + { "\"a\\\"b\"", Tokenizer::TYPE_STRING }, + { "'\\xf'", Tokenizer::TYPE_STRING }, + { "'\\0'", Tokenizer::TYPE_STRING }, + + // Test symbols. + { "+", Tokenizer::TYPE_SYMBOL }, + { ".", Tokenizer::TYPE_SYMBOL }, +}; + +TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kSimpleTokenCases_case.input.data(), + kSimpleTokenCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Before Next() is called, the initial token should always be TYPE_START. + EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + + // Parse the token. + ASSERT_TRUE(tokenizer.Next()); + + // Check that it has the right type. + EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type); + // Check that it contains the complete input text. + EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text); + // Check that it is located at the beginning of the input + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + + // After Next() returns false, the token should have type TYPE_END. + EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column); + + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) { + // Test the "allow_f_after_float" option. + + // Set up the tokenizer. + const char* text = "1f 2.5f 6e3f 7F"; + TestInputStream input(text, strlen(text), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_allow_f_after_float(true); + + // Advance through tokens and check that they are parsed as expected. + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "1f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "2.5f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "6e3f"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + ASSERT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, "7F"); + EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +#endif + +// ------------------------------------------------------------------- + +// In each case, the input is parsed to produce a list of tokens. The +// last token in "output" must have type TYPE_END. +struct MultiTokenCase { + string input; + Tokenizer::Token output[10]; // The compiler wants a constant array + // size for initialization to work. There + // is no reason this can't be increased if + // needed. +}; + +inline ostream& operator<<(ostream& out, + const MultiTokenCase& test_case) { + return out << CEscape(test_case.input); +} + +MultiTokenCase kMultiTokenCases[] = { + // Test empty input. + { "", { + { Tokenizer::TYPE_END , "" , 0, 0 }, + }}, + + // Test all token types at the same time. + { "foo 1 1.2 + 'bar'", { + { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0 }, + { Tokenizer::TYPE_INTEGER , "1" , 0, 4 }, + { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6 }, + { Tokenizer::TYPE_SYMBOL , "+" , 0, 10 }, + { Tokenizer::TYPE_STRING , "'bar'", 0, 12 }, + { Tokenizer::TYPE_END , "" , 0, 17 }, + }}, + + // Test that consecutive symbols are parsed as separate tokens. + { "!@+%", { + { Tokenizer::TYPE_SYMBOL , "!" , 0, 0 }, + { Tokenizer::TYPE_SYMBOL , "@" , 0, 1 }, + { Tokenizer::TYPE_SYMBOL , "+" , 0, 2 }, + { Tokenizer::TYPE_SYMBOL , "%" , 0, 3 }, + { Tokenizer::TYPE_END , "" , 0, 4 }, + }}, + + // Test that newlines affect line numbers correctly. + { "foo bar\nrab oof", { + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4 }, + { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4 }, + { Tokenizer::TYPE_END , "" , 1, 7 }, + }}, + + // Test that tabs affect column numbers correctly. + { "foo\tbar \tbaz", { + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8 }, + { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16 }, + { Tokenizer::TYPE_END , "" , 0, 19 }, + }}, + + // Test that line comments are ignored. + { "foo // This is a comment\n" + "bar // This is another comment", { + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0 }, + { Tokenizer::TYPE_END , "" , 1, 30 }, + }}, + + // Test that block comments are ignored. + { "foo /* This is a block comment */ bar", { + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34 }, + { Tokenizer::TYPE_END , "" , 0, 37 }, + }}, + + // Test that sh-style comments are not ignored by default. + { "foo # bar\n" + "baz", { + { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0 }, + { Tokenizer::TYPE_SYMBOL , "#" , 0, 4 }, + { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6 }, + { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0 }, + { Tokenizer::TYPE_END , "" , 1, 3 }, + }}, +}; + +TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kMultiTokenCases_case.input.data(), + kMultiTokenCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Before Next() is called, the initial token should always be TYPE_START. + EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type); + EXPECT_EQ("", tokenizer.current().text); + EXPECT_EQ(0, tokenizer.current().line); + EXPECT_EQ(0, tokenizer.current().column); + + // Loop through all expected tokens. + int i = 0; + Tokenizer::Token token; + do { + token = kMultiTokenCases_case.output[i++]; + + SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text); + + // Next() should only return false when it hits the end token. + if (token.type != Tokenizer::TYPE_END) { + ASSERT_TRUE(tokenizer.Next()); + } else { + ASSERT_FALSE(tokenizer.Next()); + } + + // Check that the token matches the expected one. + EXPECT_EQ(token.type, tokenizer.current().type); + EXPECT_EQ(token.text, tokenizer.current().text); + EXPECT_EQ(token.line, tokenizer.current().line); + EXPECT_EQ(token.column, tokenizer.current().column); + + } while (token.type != Tokenizer::TYPE_END); + + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error: +// "sorry, unimplemented: `method_call_expr' not supported by dump_expr" +#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3) + +TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) { + // Test the "comment_style" option. + + const char* text = "foo # bar\n" + "baz // qux\n" + "corge /* grault */\n" + "garply"; + const char* const kTokens[] = {"foo", // "# bar" is ignored + "baz", "/", "/", "qux", + "corge", "/", "*", "grault", "*", "/", + "garply"}; + + // Set up the tokenizer. + TestInputStream input(text, strlen(text), kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE); + + // Advance through tokens and check that they are parsed as expected. + for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) { + EXPECT_TRUE(tokenizer.Next()); + EXPECT_EQ(tokenizer.current().text, kTokens[i]); + } + + // There should be no more input. + EXPECT_FALSE(tokenizer.Next()); + // There should be no errors. + EXPECT_TRUE(error_collector.text_.empty()); +} + +#endif + +// ------------------------------------------------------------------- + +// Test parse helpers. It's not really worth setting up a full data-driven +// test here. +TEST_F(TokenizerTest, ParseInteger) { + EXPECT_EQ(0, ParseInteger("0")); + EXPECT_EQ(123, ParseInteger("123")); + EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12")); + EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12")); + EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF")); + EXPECT_EQ(01234567, ParseInteger("01234567")); + + // Test invalid integers that may still be tokenized as integers. + EXPECT_EQ(0, ParseInteger("0x")); + +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet + // Test invalid integers that will never be tokenized as integers. + EXPECT_DEBUG_DEATH(ParseInteger("zxy"), + "passed text that could not have been tokenized as an integer"); + EXPECT_DEBUG_DEATH(ParseInteger("1.2"), + "passed text that could not have been tokenized as an integer"); + EXPECT_DEBUG_DEATH(ParseInteger("08"), + "passed text that could not have been tokenized as an integer"); + EXPECT_DEBUG_DEATH(ParseInteger("0xg"), + "passed text that could not have been tokenized as an integer"); + EXPECT_DEBUG_DEATH(ParseInteger("-1"), + "passed text that could not have been tokenized as an integer"); +#endif // GTEST_HAS_DEATH_TEST + + // Test overflows. + uint64 i; + EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i)); + EXPECT_TRUE (Tokenizer::ParseInteger("1", 1, &i)); + EXPECT_TRUE (Tokenizer::ParseInteger("12345", 12345, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i)); + EXPECT_TRUE (Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF" , kuint64max, &i)); + EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i)); +} + +TEST_F(TokenizerTest, ParseFloat) { + EXPECT_DOUBLE_EQ(1 , Tokenizer::ParseFloat("1.")); + EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1e3")); + EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1E3")); + EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3")); + EXPECT_DOUBLE_EQ(.1 , Tokenizer::ParseFloat(".1")); + EXPECT_DOUBLE_EQ(.25 , Tokenizer::ParseFloat(".25")); + EXPECT_DOUBLE_EQ(.1e3 , Tokenizer::ParseFloat(".1e3")); + EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3")); + EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3")); + EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3")); + EXPECT_DOUBLE_EQ(5 , Tokenizer::ParseFloat("5")); + EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12")); + EXPECT_DOUBLE_EQ(1.2 , Tokenizer::ParseFloat("1.2")); + EXPECT_DOUBLE_EQ(1.e2 , Tokenizer::ParseFloat("1.e2")); + + // Test invalid integers that may still be tokenized as integers. + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e")); + + // Test 'f' suffix. + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f")); + EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F")); + + // These should parse successfully even though they are out of range. + // Overflows become infinity and underflows become zero. + EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999")); + EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999")); + +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet + // Test invalid integers that will never be tokenized as integers. + EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"), + "passed text that could not have been tokenized as a float"); + EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("1-e0"), + "passed text that could not have been tokenized as a float"); + EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"), + "passed text that could not have been tokenized as a float"); +#endif // GTEST_HAS_DEATH_TEST +} + +TEST_F(TokenizerTest, ParseString) { + string output; + Tokenizer::ParseString("'hello'", &output); + EXPECT_EQ("hello", output); + Tokenizer::ParseString("\"blah\\nblah2\"", &output); + EXPECT_EQ("blah\nblah2", output); + Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output); + EXPECT_EQ("\1x\1\123\739\52\334n\3", output); + Tokenizer::ParseString("'\\x20\\x4'", &output); + EXPECT_EQ("\x20\x4", output); + + // Test invalid strings that may still be tokenized as strings. + Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid + EXPECT_EQ("\a?\v\t", output); + Tokenizer::ParseString("'", &output); + EXPECT_EQ("", output); + Tokenizer::ParseString("'\\", &output); + EXPECT_EQ("\\", output); + + // Test invalid strings that will never be tokenized as strings. +#ifdef GTEST_HAS_DEATH_TEST // death tests do not work on Windows yet + EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output), + "passed text that could not have been tokenized as a string"); +#endif // GTEST_HAS_DEATH_TEST +} + +// ------------------------------------------------------------------- + +// Each case parses some input text, ignoring the tokens produced, and +// checks that the error output matches what is expected. +struct ErrorCase { + string input; + bool recoverable; // True if the tokenizer should be able to recover and + // parse more tokens after seeing this error. Cases + // for which this is true must end with "foo" as + // the last token, which the test will check for. + const char* errors; +}; + +inline ostream& operator<<(ostream& out, + const ErrorCase& test_case) { + return out << CEscape(test_case.input); +} + +ErrorCase kErrorCases[] = { + // String errors. + { "'\\l' foo", true, + "0:2: Invalid escape sequence in string literal.\n" }, + { "'\\x' foo", true, + "0:3: Expected hex digits for escape sequence.\n" }, + { "'foo", false, + "0:4: String literals cannot cross line boundaries.\n" }, + { "'bar\nfoo", true, + "0:4: String literals cannot cross line boundaries.\n" }, + + // Integer errors. + { "123foo", true, + "0:3: Need space between number and identifier.\n" }, + + // Hex/octal errors. + { "0x foo", true, + "0:2: \"0x\" must be followed by hex digits.\n" }, + { "0541823 foo", true, + "0:4: Numbers starting with leading zero must be in octal.\n" }, + { "0x123z foo", true, + "0:5: Need space between number and identifier.\n" }, + { "0x123.4 foo", true, + "0:5: Hex and octal numbers must be integers.\n" }, + { "0123.4 foo", true, + "0:4: Hex and octal numbers must be integers.\n" }, + + // Float errors. + { "1e foo", true, + "0:2: \"e\" must be followed by exponent.\n" }, + { "1e- foo", true, + "0:3: \"e\" must be followed by exponent.\n" }, + { "1.2.3 foo", true, + "0:3: Already saw decimal point or exponent; can't have another one.\n" }, + { "1e2.3 foo", true, + "0:3: Already saw decimal point or exponent; can't have another one.\n" }, + { "a.1 foo", true, + "0:1: Need space between identifier and decimal point.\n" }, + // allow_f_after_float not enabled, so this should be an error. + { "1.0f foo", true, + "0:3: Need space between number and identifier.\n" }, + + // Block comment errors. + { "/*", false, + "0:2: End-of-file inside block comment.\n" + "0:0: Comment started here.\n"}, + { "/*/*/ foo", true, + "0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"}, + + // Control characters. Multiple consecutive control characters should only + // produce one error. + { "\b foo", true, + "0:0: Invalid control characters encountered in text.\n" }, + { "\b\b foo", true, + "0:0: Invalid control characters encountered in text.\n" }, + + // Check that control characters at end of input don't result in an + // infinite loop. + { "\b", false, + "0:0: Invalid control characters encountered in text.\n" }, + + // Check recovery from '\0'. We have to explicitly specify the length of + // these strings because otherwise the string constructor will just call + // strlen() which will see the first '\0' and think that is the end of the + // string. + { string("\0foo", 4), true, + "0:0: Invalid control characters encountered in text.\n" }, + { string("\0\0foo", 5), true, + "0:0: Invalid control characters encountered in text.\n" }, +}; + +TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) { + // Set up the tokenizer. + TestInputStream input(kErrorCases_case.input.data(), + kErrorCases_case.input.size(), + kBlockSizes_case); + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + // Ignore all input, except remember if the last token was "foo". + bool last_was_foo = false; + while (tokenizer.Next()) { + last_was_foo = tokenizer.current().text == "foo"; + } + + // Check that the errors match what was expected. + EXPECT_EQ(error_collector.text_, kErrorCases_case.errors); + + // If the error was recoverable, make sure we saw "foo" after it. + if (kErrorCases_case.recoverable) { + EXPECT_TRUE(last_was_foo); + } +} + +// ------------------------------------------------------------------- + +TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) { + string text = "foo bar"; + TestInputStream input(text.data(), text.size(), kBlockSizes_case); + + // Create a tokenizer, read one token, then destroy it. + { + TestErrorCollector error_collector; + Tokenizer tokenizer(&input, &error_collector); + + tokenizer.Next(); + } + + // Only "foo" should have been read. + EXPECT_EQ(strlen("foo"), input.ByteCount()); +} + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc new file mode 100644 index 00000000..80559c4a --- /dev/null +++ b/src/google/protobuf/io/zero_copy_stream.cc @@ -0,0 +1,34 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#include <google/protobuf/io/zero_copy_stream.h> + + +namespace google { +namespace protobuf { +namespace io { + +ZeroCopyInputStream::~ZeroCopyInputStream() {} +ZeroCopyOutputStream::~ZeroCopyOutputStream() {} + + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h new file mode 100644 index 00000000..bce5f2d3 --- /dev/null +++ b/src/google/protobuf/io/zero_copy_stream.h @@ -0,0 +1,224 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains the ZeroCopyInputStream and ZeroCopyOutputStream +// interfaces, which represent abstract I/O streams to and from which +// protocol buffers can be read and written. For a few simple +// implementations of these interfaces, see zero_copy_stream_impl.h. +// +// These interfaces are different from classic I/O streams in that they +// try to minimize the amount of data copying that needs to be done. +// To accomplish this, responsibility for allocating buffers is moved to +// the stream object, rather than being the responsibility of the caller. +// So, the stream can return a buffer which actually points directly into +// the final data structure where the bytes are to be stored, and the caller +// can interact directly with that buffer, eliminating an intermediate copy +// operation. +// +// As an example, consider the common case in which you are reading bytes +// from an array that is already in memory (or perhaps an mmap()ed file). +// With classic I/O streams, you would do something like: +// char buffer[BUFFER_SIZE]; +// input->Read(buffer, BUFFER_SIZE); +// DoSomething(buffer, BUFFER_SIZE); +// Then, the stream basically just calls memcpy() to copy the data from +// the array into your buffer. With a ZeroCopyInputStream, you would do +// this instead: +// const void* buffer; +// int size; +// input->Next(&buffer, &size); +// DoSomething(buffer, size); +// Here, no copy is performed. The input stream returns a pointer directly +// into the backing array, and the caller ends up reading directly from it. +// +// If you want to be able to read the old-fashion way, you can create +// a CodedInputStream or CodedOutputStream wrapping these objects and use +// their ReadRaw()/WriteRaw() methods. These will, of course, add a copy +// step, but Coded*Stream will handle buffering so at least it will be +// reasonably efficient. +// +// ZeroCopyInputStream example: +// // Read in a file and print its contents to stdout. +// int fd = open("myfile", O_RDONLY); +// ZeroCopyInputStream* input = new FileInputStream(fd); +// +// const void* buffer; +// int size; +// while (input->Next(&buffer, &size)) { +// cout.write(buffer, size); +// } +// +// delete input; +// close(fd); +// +// ZeroCopyOutputStream example: +// // Copy the contents of "infile" to "outfile", using plain read() for +// // "infile" but a ZeroCopyOutputStream for "outfile". +// int infd = open("infile", O_RDONLY); +// int outfd = open("outfile", O_WRONLY); +// ZeroCopyInputStream* output = new FileOutputStream(outfd); +// +// void* buffer; +// int size; +// while (output->Next(&buffer, &size)) { +// int bytes = read(infd, buffer, size); +// if (bytes < size) { +// // Reached EOF. +// output->BackUp(size - bytes); +// break; +// } +// } +// +// delete output; +// close(infd); +// close(outfd); + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ + +#include <string> +#include <google/protobuf/stubs/common.h> + +namespace google { + +namespace protobuf { +namespace io { + +// Defined in this file. +class ZeroCopyInputStream; +class ZeroCopyOutputStream; + +// Abstract interface similar to an input stream but designed to minimize +// copying. +class LIBPROTOBUF_EXPORT ZeroCopyInputStream { + public: + inline ZeroCopyInputStream() {} + virtual ~ZeroCopyInputStream(); + + // Obtains a chunk of data from the stream. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, there is no more data to return or + // an error occurred. All errors are permanent. + // * Otherwise, "size" points to the actual number of bytes read and "data" + // points to a pointer to a buffer containing these bytes. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(const void** data, int* size) = 0; + + // Backs up a number of bytes, so that the next call to Next() returns + // data again that was already returned by the last call to Next(). This + // is useful when writing procedures that are only supposed to read up + // to a certain point in the input, then return. If Next() returns a + // buffer that goes beyond what you wanted to read, you can use BackUp() + // to return to the point where you intended to finish. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // pushed back into the stream. Subsequent calls to Next() will return + // the same data again before producing new data. + virtual void BackUp(int count) = 0; + + // Skips a number of bytes. Returns false if the end of the stream is + // reached or some input error occurred. In the end-of-stream case, the + // stream is advanced to the end of the stream (so ByteCount() will return + // the total size of the stream). + virtual bool Skip(int count) = 0; + + // Returns the total number of bytes read since this object was created. + virtual int64 ByteCount() const = 0; + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyInputStream); +}; + +// Abstract interface similar to an output stream but designed to minimize +// copying. +class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { + public: + inline ZeroCopyOutputStream() {} + virtual ~ZeroCopyOutputStream(); + + // Obtains a buffer into which data can be written. Any data written + // into this buffer will eventually (maybe instantly, maybe later on) + // be written to the output. + // + // Preconditions: + // * "size" and "data" are not NULL. + // + // Postconditions: + // * If the returned value is false, an error occurred. All errors are + // permanent. + // * Otherwise, "size" points to the actual number of bytes in the buffer + // and "data" points to the buffer. + // * Ownership of this buffer remains with the stream, and the buffer + // remains valid only until some other method of the stream is called + // or the stream is destroyed. + // * Any data which the caller stores in this buffer will eventually be + // written to the output (unless BackUp() is called). + // * It is legal for the returned buffer to have zero size, as long + // as repeatedly calling Next() eventually yields a buffer with non-zero + // size. + virtual bool Next(void** data, int* size) = 0; + + // Backs up a number of bytes, so that the end of the last buffer returned + // by Next() is not actually written. This is needed when you finish + // writing all the data you want to write, but the last buffer was bigger + // than you needed. You don't want to write a bunch of garbage after the + // end of your data, so you use BackUp() to back up. + // + // Preconditions: + // * The last method called must have been Next(). + // * count must be less than or equal to the size of the last buffer + // returned by Next(). + // * The caller must not have written anything to the last "count" bytes + // of that buffer. + // + // Postconditions: + // * The last "count" bytes of the last buffer returned by Next() will be + // ignored. + virtual void BackUp(int count) = 0; + + // Returns the total number of bytes written since this object was created. + virtual int64 ByteCount() const = 0; + + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream); +}; + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc new file mode 100644 index 00000000..7ff84460 --- /dev/null +++ b/src/google/protobuf/io/zero_copy_stream_impl.cc @@ -0,0 +1,793 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#ifdef _MSC_VER +#include <io.h> +#else +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#endif +#include <errno.h> +#include <iostream> +#include <google/protobuf/io/zero_copy_stream_impl.h> +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/stl_util-inl.h> + +namespace google { +namespace protobuf { +namespace io { + +#ifdef _WIN32 +// Win32 lseek is broken: If invoked on a non-seekable file descriptor, its +// return value is undefined. We re-define it to always produce an error. +#define lseek(fd, offset, origin) ((off_t)-1) +#endif + +namespace { + + +// EINTR sucks. +int close_no_eintr(int fd) { + int result; + do { + result = close(fd); + } while (result < 0 && errno == EINTR); + return result; +} + +// Default block size for Copying{In,Out}putStreamAdaptor. +static const int kDefaultBlockSize = 8192; + +} // namespace + +// =================================================================== + +ArrayInputStream::ArrayInputStream(const void* data, int size, + int block_size) + : data_(reinterpret_cast<const uint8*>(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) { +} + +ArrayInputStream::~ArrayInputStream() { +} + +bool ArrayInputStream::Next(const void** data, int* size) { + if (position_ < size_) { + last_returned_size_ = min(block_size_, size_ - position_); + *data = data_ + position_; + *size = last_returned_size_; + position_ += last_returned_size_; + return true; + } else { + // We're at the end of the array. + last_returned_size_ = 0; // Don't let caller back up. + return false; + } +} + +void ArrayInputStream::BackUp(int count) { + GOOGLE_CHECK_GT(last_returned_size_, 0) + << "BackUp() can only be called after a successful Next()."; + GOOGLE_CHECK_LE(count, last_returned_size_); + GOOGLE_CHECK_GE(count, 0); + position_ -= count; + last_returned_size_ = 0; // Don't let caller back up further. +} + +bool ArrayInputStream::Skip(int count) { + GOOGLE_CHECK_GE(count, 0); + last_returned_size_ = 0; // Don't let caller back up. + if (count > size_ - position_) { + position_ = size_; + return false; + } else { + position_ += count; + return true; + } +} + +int64 ArrayInputStream::ByteCount() const { + return position_; +} + + +// =================================================================== + +ArrayOutputStream::ArrayOutputStream(void* data, int size, int block_size) + : data_(reinterpret_cast<uint8*>(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) { +} + +ArrayOutputStream::~ArrayOutputStream() { +} + +bool ArrayOutputStream::Next(void** data, int* size) { + if (position_ < size_) { + last_returned_size_ = min(block_size_, size_ - position_); + *data = data_ + position_; + *size = last_returned_size_; + position_ += last_returned_size_; + return true; + } else { + // We're at the end of the array. + last_returned_size_ = 0; // Don't let caller back up. + return false; + } +} + +void ArrayOutputStream::BackUp(int count) { + GOOGLE_CHECK_GT(last_returned_size_, 0) + << "BackUp() can only be called after a successful Next()."; + GOOGLE_CHECK_LE(count, last_returned_size_); + GOOGLE_CHECK_GE(count, 0); + position_ -= count; + last_returned_size_ = 0; // Don't let caller back up further. +} + +int64 ArrayOutputStream::ByteCount() const { + return position_; +} + +// =================================================================== + +StringOutputStream::StringOutputStream(string* target) + : target_(target) { +} + +StringOutputStream::~StringOutputStream() { +} + +bool StringOutputStream::Next(void** data, int* size) { + int old_size = target_->size(); + + // Grow the string. + if (old_size < target_->capacity()) { + // Resize the string to match its capacity, since we can get away + // without a memory allocation this way. + target_->resize(target_->capacity()); + } else { + // Size has reached capacity, so double the size. Also make sure + // that the new size is at least kMinimumSize. + target_->resize( + max(old_size * 2, + kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. + } + + *data = string_as_array(target_) + old_size; + *size = target_->size() - old_size; + return true; +} + +void StringOutputStream::BackUp(int count) { + GOOGLE_CHECK_GE(count, 0); + GOOGLE_CHECK_LE(count, target_->size()); + target_->resize(target_->size() - count); +} + +int64 StringOutputStream::ByteCount() const { + return target_->size(); +} + +// =================================================================== + + +// =================================================================== + +CopyingInputStream::~CopyingInputStream() {} + +int CopyingInputStream::Skip(int count) { + char junk[4096]; + int skipped = 0; + while (skipped < count) { + int bytes = Read(junk, min(count, implicit_cast<int>(sizeof(junk)))); + if (bytes <= 0) { + // EOF or read error. + return skipped; + } + skipped += bytes; + } + return skipped; +} + +CopyingInputStreamAdaptor::CopyingInputStreamAdaptor( + CopyingInputStream* copying_stream, int block_size) + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0), + backup_bytes_(0) { +} + +CopyingInputStreamAdaptor::~CopyingInputStreamAdaptor() { + if (owns_copying_stream_) { + delete copying_stream_; + } +} + +bool CopyingInputStreamAdaptor::Next(const void** data, int* size) { + if (failed_) { + // Already failed on a previous read. + return false; + } + + AllocateBufferIfNeeded(); + + if (backup_bytes_ > 0) { + // We have data left over from a previous BackUp(), so just return that. + *data = buffer_.get() + buffer_used_ - backup_bytes_; + *size = backup_bytes_; + backup_bytes_ = 0; + return true; + } + + // Read new data into the buffer. + buffer_used_ = copying_stream_->Read(buffer_.get(), buffer_size_); + if (buffer_used_ <= 0) { + // EOF or read error. We don't need the buffer anymore. + if (buffer_used_ < 0) { + // Read error (not EOF). + failed_ = true; + } + FreeBuffer(); + return false; + } + position_ += buffer_used_; + + *size = buffer_used_; + *data = buffer_.get(); + return true; +} + +void CopyingInputStreamAdaptor::BackUp(int count) { + GOOGLE_CHECK(backup_bytes_ == 0 && buffer_.get() != NULL) + << " BackUp() can only be called after Next()."; + GOOGLE_CHECK_LE(count, buffer_used_) + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; + GOOGLE_CHECK_GE(count, 0) + << " Parameter to BackUp() can't be negative."; + + backup_bytes_ = count; +} + +bool CopyingInputStreamAdaptor::Skip(int count) { + GOOGLE_CHECK_GE(count, 0); + + if (failed_) { + // Already failed on a previous read. + return false; + } + + // First skip any bytes left over from a previous BackUp(). + if (backup_bytes_ >= count) { + // We have more data left over than we're trying to skip. Just chop it. + backup_bytes_ -= count; + return true; + } + + count -= backup_bytes_; + backup_bytes_ = 0; + + int skipped = copying_stream_->Skip(count); + position_ += skipped; + return skipped == count; +} + +int64 CopyingInputStreamAdaptor::ByteCount() const { + return position_ - backup_bytes_; +} + +void CopyingInputStreamAdaptor::AllocateBufferIfNeeded() { + if (buffer_.get() == NULL) { + buffer_.reset(new uint8[buffer_size_]); + } +} + +void CopyingInputStreamAdaptor::FreeBuffer() { + GOOGLE_CHECK_EQ(backup_bytes_, 0); + buffer_used_ = 0; + buffer_.reset(); +} + +// =================================================================== + +CopyingOutputStream::~CopyingOutputStream() {} + +CopyingOutputStreamAdaptor::CopyingOutputStreamAdaptor( + CopyingOutputStream* copying_stream, int block_size) + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0) { +} + +CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() { + WriteBuffer(); + if (owns_copying_stream_) { + delete copying_stream_; + } +} + +bool CopyingOutputStreamAdaptor::Flush() { + return WriteBuffer(); +} + +bool CopyingOutputStreamAdaptor::Next(void** data, int* size) { + if (buffer_used_ == buffer_size_) { + if (!WriteBuffer()) return false; + } + + AllocateBufferIfNeeded(); + + *data = buffer_.get() + buffer_used_; + *size = buffer_size_ - buffer_used_; + buffer_used_ = buffer_size_; + return true; +} + +void CopyingOutputStreamAdaptor::BackUp(int count) { + GOOGLE_CHECK_GE(count, 0); + GOOGLE_CHECK_EQ(buffer_used_, buffer_size_) + << " BackUp() can only be called after Next()."; + GOOGLE_CHECK_LE(count, buffer_used_) + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; + + buffer_used_ -= count; +} + +int64 CopyingOutputStreamAdaptor::ByteCount() const { + return position_ + buffer_used_; +} + +bool CopyingOutputStreamAdaptor::WriteBuffer() { + if (failed_) { + // Already failed on a previous write. + return false; + } + + if (buffer_used_ == 0) return true; + + if (copying_stream_->Write(buffer_.get(), buffer_used_)) { + position_ += buffer_used_; + buffer_used_ = 0; + return true; + } else { + failed_ = true; + FreeBuffer(); + return false; + } +} + +void CopyingOutputStreamAdaptor::AllocateBufferIfNeeded() { + if (buffer_ == NULL) { + buffer_.reset(new uint8[buffer_size_]); + } +} + +void CopyingOutputStreamAdaptor::FreeBuffer() { + buffer_used_ = 0; + buffer_.reset(); +} + +// =================================================================== + +FileInputStream::FileInputStream(int file_descriptor, int block_size) + : copying_input_(file_descriptor), + impl_(©ing_input_, block_size) { +} + +FileInputStream::~FileInputStream() {} + +bool FileInputStream::Close() { + return copying_input_.Close(); +} + +bool FileInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void FileInputStream::BackUp(int count) { + impl_.BackUp(count); +} + +bool FileInputStream::Skip(int count) { + return impl_.Skip(count); +} + +int64 FileInputStream::ByteCount() const { + return impl_.ByteCount(); +} + +FileInputStream::CopyingFileInputStream::CopyingFileInputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0), + previous_seek_failed_(false) { +} + +FileInputStream::CopyingFileInputStream::~CopyingFileInputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileInputStream::CopyingFileInputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +int FileInputStream::CopyingFileInputStream::Read(void* buffer, int size) { + GOOGLE_CHECK(!is_closed_); + + int result; + do { + result = read(file_, buffer, size); + } while (result < 0 && errno == EINTR); + + if (result < 0) { + // Read error (not EOF). + errno_ = errno; + } + + return result; +} + +int FileInputStream::CopyingFileInputStream::Skip(int count) { + GOOGLE_CHECK(!is_closed_); + + if (!previous_seek_failed_ && + lseek(file_, count, SEEK_CUR) != (off_t)-1) { + // Seek succeeded. + return count; + } else { + // Failed to seek. + + // Note to self: Don't seek again. This file descriptor doesn't + // support it. + previous_seek_failed_ = true; + + // Use the default implementation. + return CopyingInputStream::Skip(count); + } +} + +// =================================================================== + +FileOutputStream::FileOutputStream(int file_descriptor, int block_size) + : copying_output_(file_descriptor), + impl_(©ing_output_, block_size) { +} + +FileOutputStream::~FileOutputStream() { + impl_.Flush(); +} + +bool FileOutputStream::Close() { + bool flush_succeeded = impl_.Flush(); + return copying_output_.Close() && flush_succeeded; +} + +bool FileOutputStream::Next(void** data, int* size) { + return impl_.Next(data, size); +} + +void FileOutputStream::BackUp(int count) { + impl_.BackUp(count); +} + +int64 FileOutputStream::ByteCount() const { + return impl_.ByteCount(); +} + +FileOutputStream::CopyingFileOutputStream::CopyingFileOutputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0) { +} + +FileOutputStream::CopyingFileOutputStream::~CopyingFileOutputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileOutputStream::CopyingFileOutputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +bool FileOutputStream::CopyingFileOutputStream::Write( + const void* buffer, int size) { + GOOGLE_CHECK(!is_closed_); + int total_written = 0; + + const uint8* buffer_base = reinterpret_cast<const uint8*>(buffer); + + while (total_written < size) { + int bytes; + do { + bytes = write(file_, buffer_base + total_written, size - total_written); + } while (bytes < 0 && errno == EINTR); + + if (bytes <= 0) { + // Write error. + + // FIXME(kenton): According to the man page, if write() returns zero, + // there was no error; write() simply did not write anything. It's + // unclear under what circumstances this might happen, but presumably + // errno won't be set in this case. I am confused as to how such an + // event should be handled. For now I'm treating it as an error, since + // retrying seems like it could lead to an infinite loop. I suspect + // this never actually happens anyway. + + if (bytes < 0) { + errno_ = errno; + } + return false; + } + total_written += bytes; + } + + return true; +} + +// =================================================================== + +IstreamInputStream::IstreamInputStream(istream* input, int block_size) + : copying_input_(input), + impl_(©ing_input_, block_size) { +} + +IstreamInputStream::~IstreamInputStream() {} + +bool IstreamInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void IstreamInputStream::BackUp(int count) { + impl_.BackUp(count); +} + +bool IstreamInputStream::Skip(int count) { + return impl_.Skip(count); +} + +int64 IstreamInputStream::ByteCount() const { + return impl_.ByteCount(); +} + +IstreamInputStream::CopyingIstreamInputStream::CopyingIstreamInputStream( + istream* input) + : input_(input) { +} + +IstreamInputStream::CopyingIstreamInputStream::~CopyingIstreamInputStream() {} + +int IstreamInputStream::CopyingIstreamInputStream::Read( + void* buffer, int size) { + input_->read(reinterpret_cast<char*>(buffer), size); + int result = input_->gcount(); + if (result == 0 && input_->fail() && !input_->eof()) { + return -1; + } + return result; +} + +// =================================================================== + +OstreamOutputStream::OstreamOutputStream(ostream* output, int block_size) + : copying_output_(output), + impl_(©ing_output_, block_size) { +} + +OstreamOutputStream::~OstreamOutputStream() { + impl_.Flush(); +} + +bool OstreamOutputStream::Next(void** data, int* size) { + return impl_.Next(data, size); +} + +void OstreamOutputStream::BackUp(int count) { + impl_.BackUp(count); +} + +int64 OstreamOutputStream::ByteCount() const { + return impl_.ByteCount(); +} + +OstreamOutputStream::CopyingOstreamOutputStream::CopyingOstreamOutputStream( + ostream* output) + : output_(output) { +} + +OstreamOutputStream::CopyingOstreamOutputStream::~CopyingOstreamOutputStream() { +} + +bool OstreamOutputStream::CopyingOstreamOutputStream::Write( + const void* buffer, int size) { + output_->write(reinterpret_cast<const char*>(buffer), size); + return output_->good(); +} + +// =================================================================== + +ConcatenatingInputStream::ConcatenatingInputStream( + ZeroCopyInputStream* const streams[], int count) + : streams_(streams), stream_count_(count), bytes_retired_(0) { +} + +ConcatenatingInputStream::~ConcatenatingInputStream() { +} + +bool ConcatenatingInputStream::Next(const void** data, int* size) { + while (stream_count_ > 0) { + if (streams_[0]->Next(data, size)) return true; + + // That stream is done. Advance to the next one. + bytes_retired_ += streams_[0]->ByteCount(); + ++streams_; + --stream_count_; + } + + // No more streams. + return false; +} + +void ConcatenatingInputStream::BackUp(int count) { + if (stream_count_ > 0) { + streams_[0]->BackUp(count); + } else { + GOOGLE_LOG(DFATAL) << "Can't BackUp() after failed Next()."; + } +} + +bool ConcatenatingInputStream::Skip(int count) { + while (stream_count_ > 0) { + // Assume that ByteCount() can be used to find out how much we actually + // skipped when Skip() fails. + int64 target_byte_count = streams_[0]->ByteCount() + count; + if (streams_[0]->Skip(count)) return true; + + // Hit the end of the stream. Figure out how many more bytes we still have + // to skip. + int64 final_byte_count = streams_[0]->ByteCount(); + GOOGLE_DCHECK_LT(final_byte_count, target_byte_count); + count = target_byte_count - final_byte_count; + + // That stream is done. Advance to the next one. + bytes_retired_ += final_byte_count; + ++streams_; + --stream_count_; + } + + return false; +} + +int64 ConcatenatingInputStream::ByteCount() const { + if (stream_count_ == 0) { + return bytes_retired_; + } else { + return bytes_retired_ + streams_[0]->ByteCount(); + } +} + + +// =================================================================== + +LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input, + int64 limit) + : input_(input), limit_(limit) {} + +LimitingInputStream::~LimitingInputStream() { + // If we overshot the limit, back up. + if (limit_ < 0) input_->BackUp(-limit_); +} + +bool LimitingInputStream::Next(const void** data, int* size) { + if (limit_ < 0) return false; + if (!input_->Next(data, size)) return false; + + limit_ -= *size; + if (limit_ < 0) { + // We overshot the limit. Reduce *size to hide the rest of the buffer. + *size += limit_; + } + return true; +} + +void LimitingInputStream::BackUp(int count) { + if (limit_ < 0) { + input_->BackUp(count - limit_); + limit_ = count; + } else { + input_->BackUp(count); + limit_ += count; + } +} + +bool LimitingInputStream::Skip(int count) { + if (count > limit_) { + if (limit_ < 0) return false; + input_->Skip(limit_); + limit_ = 0; + return false; + } else { + if (!input_->Skip(count)) return false; + limit_ -= count; + return true; + } +} + +int64 LimitingInputStream::ByteCount() const { + if (limit_ < 0) { + return input_->ByteCount() + limit_; + } else { + return input_->ByteCount(); + } +} + + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h new file mode 100644 index 00000000..bd73afb7 --- /dev/null +++ b/src/google/protobuf/io/zero_copy_stream_impl.h @@ -0,0 +1,617 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h. These implementations cover I/O on raw arrays, +// strings, and file descriptors. Of course, many users will probably +// want to write their own implementations of these interfaces specific +// to the particular I/O abstractions they prefer to use, but these +// should cover the most common cases. + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ + +#include <string> +#include <iosfwd> +#include <google/protobuf/io/zero_copy_stream.h> +#include <google/protobuf/stubs/common.h> + + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream backed by an in-memory array of bytes. +class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { + public: + // Create an InputStream that returns the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayInputStream(const void* data, int size, int block_size = -1); + ~ArrayInputStream(); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + + private: + const uint8* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream backed by an in-memory array of bytes. +class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { + public: + // Create an OutputStream that writes to the bytes pointed to by "data". + // "data" remains the property of the caller but must remain valid until + // the stream is destroyed. If a block_size is given, calls to Next() + // will return data blocks no larger than the given size. Otherwise, the + // first call to Next() returns the entire array. block_size is mainly + // useful for testing; in production you would probably never want to set + // it. + ArrayOutputStream(void* data, int size, int block_size = -1); + ~ArrayOutputStream(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + uint8* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. + + int position_; + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which appends bytes to a string. +class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { + public: + // Create a StringOutputStream which appends bytes to the given string. + // The string remains property of the caller, but it MUST NOT be accessed + // in any way until the stream is destroyed. + // + // Hint: If you call target->reserve(n) before creating the stream, + // the first call to Next() will return at least n bytes of buffer + // space. + explicit StringOutputStream(string* target); + ~StringOutputStream(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + static const int kMinimumSize = 16; + + string* target_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream); +}; + +// Note: There is no StringInputStream. Instead, just create an +// ArrayInputStream as follows: +// ArrayInputStream input(str.data(), str.size()); + +// =================================================================== + + +// =================================================================== + +// A generic traditional input stream interface. +// +// Lots of traditional input streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every read +// involves copying bytes into a buffer. If you want to take such an +// interface and make a ZeroCopyInputStream based on it, simply implement +// CopyingInputStream and then use CopyingInputStreamAdaptor. +// +// CopyingInputStream implementations should avoid buffering if possible. +// CopyingInputStreamAdaptor does its own buffering and will read data +// in large blocks. +class LIBPROTOBUF_EXPORT CopyingInputStream { + public: + virtual ~CopyingInputStream(); + + // Reads up to "size" bytes into the given buffer. Returns the number of + // bytes read. Read() waits until at least one byte is available, or + // returns zero if no bytes will ever become available (EOF), or -1 if a + // permanent read error occurred. + virtual int Read(void* buffer, int size) = 0; + + // Skips the next "count" bytes of input. Returns the number of bytes + // actually skipped. This will always be exactly equal to "count" unless + // EOF was reached or a permanent read error occurred. + // + // The default implementation just repeatedly calls Read() into a scratch + // buffer. + virtual int Skip(int count); +}; + +// A ZeroCopyInputStream which reads from a CopyingInputStream. This is +// useful for implementing ZeroCopyInputStreams that read from traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to read from file descriptors or C++ istreams, this is +// already implemented for you: use FileInputStream or IstreamInputStream +// respectively. +class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given CopyingInputStream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. The caller retains ownership of + // copying_stream unless SetOwnsCopyingStream(true) is called. + explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream, + int block_size = -1); + ~CopyingInputStreamAdaptor(); + + // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to + // delete the underlying CopyingInputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + private: + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer and resets buffer_used_. + void FreeBuffer(); + + // The underlying copying stream. + CopyingInputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permenant error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started reading. + int64 position_; + + // Data is read into this buffer. It may be NULL if no buffer is currently + // in use. Otherwise, it points to an array of size buffer_size_. + scoped_array<uint8> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + // Number of bytes in the buffer which were backed up over by a call to + // BackUp(). These need to be returned again. + // 0 <= backup_bytes_ <= buffer_used_ + int backup_bytes_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingInputStreamAdaptor); +}; + +// =================================================================== + +// A generic traditional output stream interface. +// +// Lots of traditional output streams (e.g. file descriptors, C stdio +// streams, and C++ iostreams) expose an interface where every write +// involves copying bytes from a buffer. If you want to take such an +// interface and make a ZeroCopyOutputStream based on it, simply implement +// CopyingOutputStream and then use CopyingOutputStreamAdaptor. +// +// CopyingOutputStream implementations should avoid buffering if possible. +// CopyingOutputStreamAdaptor does its own buffering and will write data +// in large blocks. +class LIBPROTOBUF_EXPORT CopyingOutputStream { + public: + virtual ~CopyingOutputStream(); + + // Writes "size" bytes from the given buffer to the output. Returns true + // if successful, false on a write error. + virtual bool Write(const void* buffer, int size) = 0; +}; + +// A ZeroCopyOutputStream which writes to a CopyingOutputStream. This is +// useful for implementing ZeroCopyOutputStreams that write to traditional +// streams. Note that this class is not really zero-copy. +// +// If you want to write to file descriptors or C++ ostreams, this is +// already implemented for you: use FileOutputStream or OstreamOutputStream +// respectively. +class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream, + int block_size = -1); + ~CopyingOutputStreamAdaptor(); + + // Writes all pending data to the underlying stream. Returns false if a + // write error occurred on the underlying stream. (The underlying + // stream itself is not necessarily flushed.) + bool Flush(); + + // Call SetOwnsCopyingStream(true) to tell the CopyingOutputStreamAdaptor to + // delete the underlying CopyingOutputStream when it is destroyed. + void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + // Write the current buffer, if it is present. + bool WriteBuffer(); + // Insures that buffer_ is not NULL. + void AllocateBufferIfNeeded(); + // Frees the buffer. + void FreeBuffer(); + + // The underlying copying stream. + CopyingOutputStream* copying_stream_; + bool owns_copying_stream_; + + // True if we have seen a permenant error from the underlying stream. + bool failed_; + + // The current position of copying_stream_, relative to the point where + // we started writing. + int64 position_; + + // Data is written from this buffer. It may be NULL if no buffer is + // currently in use. Otherwise, it points to an array of size buffer_size_. + scoped_array<uint8> buffer_; + const int buffer_size_; + + // Number of valid bytes currently in the buffer (i.e. the size last + // returned by Next()). When BackUp() is called, we just reduce this. + // 0 <= buffer_used_ <= buffer_size_. + int buffer_used_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from a file descriptor. +// +// FileInputStream is preferred over using an ifstream with IstreamInputStream. +// The latter will introduce an extra layer of buffering, harming performance. +// Also, it's conceivable that FileInputStream could someday be enhanced +// to use zero-copy file descriptors on OSs which support them. +class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given Unix file descriptor. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit FileInputStream(int file_descriptor, int block_size = -1); + ~FileInputStream(); + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() { return copying_input_.GetErrno(); } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + private: + class LIBPROTOBUF_EXPORT CopyingFileInputStream : public CopyingInputStream { + public: + CopyingFileInputStream(int file_descriptor); + ~CopyingFileInputStream(); + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() { return errno_; } + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size); + int Skip(int count); + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + // Did we try to seek once and fail? If so, we assume this file descriptor + // doesn't support seeking and won't try again. + bool previous_seek_failed_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream); + }; + + CopyingFileInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a file descriptor. +// +// FileInputStream is preferred over using an ofstream with OstreamOutputStream. +// The latter will introduce an extra layer of buffering, harming performance. +// Also, it's conceivable that FileInputStream could someday be enhanced +// to use zero-copy file descriptors on OSs which support them. +class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit FileOutputStream(int file_descriptor, int block_size = -1); + ~FileOutputStream(); + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() { return copying_output_.GetErrno(); } + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + class LIBPROTOBUF_EXPORT CopyingFileOutputStream : public CopyingOutputStream { + public: + CopyingFileOutputStream(int file_descriptor); + ~CopyingFileOutputStream(); + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() { return errno_; } + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size); + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream); + }; + + CopyingFileOutputStream copying_output_; + CopyingOutputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from a C++ istream. +// +// Note that for reading files (or anything represented by a file descriptor), +// FileInputStream is more efficient. +class LIBPROTOBUF_EXPORT IstreamInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given C++ istream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit IstreamInputStream(istream* stream, int block_size = -1); + ~IstreamInputStream(); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + private: + class LIBPROTOBUF_EXPORT CopyingIstreamInputStream : public CopyingInputStream { + public: + CopyingIstreamInputStream(istream* input); + ~CopyingIstreamInputStream(); + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size); + // (We use the default implementation of Skip().) + + private: + // The stream. + istream* input_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream); + }; + + CopyingIstreamInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a C++ ostream. +// +// Note that for writing files (or anything represented by a file descriptor), +// FileOutputStream is more efficient. +class LIBPROTOBUF_EXPORT OstreamOutputStream : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given C++ ostream. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit OstreamOutputStream(ostream* stream, int block_size = -1); + ~OstreamOutputStream(); + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size); + void BackUp(int count); + int64 ByteCount() const; + + private: + class LIBPROTOBUF_EXPORT CopyingOstreamOutputStream : public CopyingOutputStream { + public: + CopyingOstreamOutputStream(ostream* output); + ~CopyingOstreamOutputStream(); + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size); + + private: + // The stream. + ostream* output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream); + }; + + CopyingOstreamOutputStream copying_output_; + CopyingOutputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from several other streams in sequence. +// ConcatenatingInputStream is unable to distinguish between end-of-stream +// and read errors in the underlying streams, so it assumes any errors mean +// end-of-stream. So, if the underlying streams fail for any other reason, +// ConcatenatingInputStream may do odd things. It is suggested that you do +// not use ConcatenatingInputStream on streams that might produce read errors +// other than end-of-stream. +class LIBPROTOBUF_EXPORT ConcatenatingInputStream : public ZeroCopyInputStream { + public: + // All streams passed in as well as the array itself must remain valid + // until the ConcatenatingInputStream is destroyed. + ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count); + ~ConcatenatingInputStream(); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + + private: + // As streams are retired, streams_ is incremented and count_ is + // decremented. + ZeroCopyInputStream* const* streams_; + int stream_count_; + int64 bytes_retired_; // Bytes read from previous streams. + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which wraps some other stream and limits it to +// a particular byte count. +class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream { + public: + LimitingInputStream(ZeroCopyInputStream* input, int64 limit); + ~LimitingInputStream(); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size); + void BackUp(int count); + bool Skip(int count); + int64 ByteCount() const; + + + private: + ZeroCopyInputStream* input_; + int64 limit_; // Decreases as we go, becomes negative if we overshoot. + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); +}; + +// =================================================================== + +} // namespace io +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc new file mode 100644 index 00000000..c618041f --- /dev/null +++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc @@ -0,0 +1,443 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. +// http://code.google.com/p/protobuf/ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Testing strategy: For each type of I/O (array, string, file, etc.) we +// create an output stream and write some data to it, then create a +// corresponding input stream to read the same data back and expect it to +// match. When the data is written, it is written in several small chunks +// of varying sizes, with a BackUp() after each chunk. It is read back +// similarly, but with chunks separated at different points. The whole +// process is run with a variety of block sizes for both the input and +// the output. +// +// TODO(kenton): Rewrite this test to bring it up to the standards of all +// the other proto2 tests. May want to wait for gTest to implement +// "parametized tests" so that one set of tests can be used on all the +// implementations. + +#ifdef _MSC_VER +#include <io.h> +#else +#include <unistd.h> +#endif +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <sstream> + +#include <google/protobuf/io/zero_copy_stream_impl.h> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/testing/googletest.h> +#include <gtest/gtest.h> + +namespace google { +namespace protobuf { +namespace io { +namespace { + +#ifdef _WIN32 +#define pipe(fds) _pipe(fds, 4096, O_BINARY) +#endif + +#ifndef O_BINARY +#ifdef _O_BINARY +#define O_BINARY _O_BINARY +#else +#define O_BINARY 0 // If this isn't defined, the platform doesn't need it. +#endif +#endif + +class IoTest : public testing::Test { + protected: + // Test helpers. + + // Helper to write an array of data to an output stream. + bool WriteToOutput(ZeroCopyOutputStream* output, const void* data, int size); + // Helper to read a fixed-length array of data from an input stream. + int ReadFromInput(ZeroCopyInputStream* input, void* data, int size); + // Write a string to the output stream. + void WriteString(ZeroCopyOutputStream* output, const char* str); + // Read a number of bytes equal to the size of the given string and checks + // that it matches the string. + void ReadString(ZeroCopyInputStream* input, const char* str); + // Writes some text to the output stream in a particular order. Returns + // the number of bytes written, incase the caller needs that to set up an + // input stream. + int WriteStuff(ZeroCopyOutputStream* output); + // Reads text from an input stream and expects it to match what + // WriteStuff() writes. + void ReadStuff(ZeroCopyInputStream* input); + + static const int kBlockSizes[]; + static const int kBlockSizeCount; +}; + +const int IoTest::kBlockSizes[] = {-1, 1, 2, 5, 7, 10, 23, 64}; +const int IoTest::kBlockSizeCount = GOOGLE_ARRAYSIZE(IoTest::kBlockSizes); + +bool IoTest::WriteToOutput(ZeroCopyOutputStream* output, + const void* data, int size) { + const uint8* in = reinterpret_cast<const uint8*>(data); + int in_size = size; + + void* out; + int out_size; + + while (true) { + if (!output->Next(&out, &out_size)) { + return false; + } + + if (in_size <= out_size) { + memcpy(out, in, in_size); + output->BackUp(out_size - in_size); + return true; + } + + memcpy(out, in, out_size); + in += out_size; + in_size -= out_size; + } +} + +int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) { + uint8* out = reinterpret_cast<uint8*>(data); + int out_size = size; + + const void* in; + int in_size = 0; + + while (true) { + if (!input->Next(&in, &in_size)) { + return size - out_size; + } + + if (out_size <= in_size) { + memcpy(out, in, out_size); + input->BackUp(in_size - out_size); + return size; // Copied all of it. + } + + memcpy(out, in, in_size); + out += in_size; + out_size -= in_size; + } +} + +void IoTest::WriteString(ZeroCopyOutputStream* output, const char* str) { + EXPECT_TRUE(WriteToOutput(output, str, strlen(str))); +} + +void IoTest::ReadString(ZeroCopyInputStream* input, const char* str) { + int length = strlen(str); + scoped_array<char> buffer(new char[length + 1]); + buffer[length] = '\0'; + EXPECT_EQ(ReadFromInput(input, buffer.get(), length), length); + EXPECT_STREQ(str, buffer.get()); +} + +int IoTest::WriteStuff(ZeroCopyOutputStream* output) { + WriteString(output, "Hello world!\n"); + WriteString(output, "Some te"); + WriteString(output, "xt. Blah blah."); + WriteString(output, "abcdefg"); + WriteString(output, "01234567890123456789"); + WriteString(output, "foobar"); + + EXPECT_EQ(output->ByteCount(), 68); + + int result = output->ByteCount(); + return result; +} + +// Reads text from an input stream and expects it to match what WriteStuff() +// writes. +void IoTest::ReadStuff(ZeroCopyInputStream* input) { + ReadString(input, "Hello world!\n"); + ReadString(input, "Some text. "); + ReadString(input, "Blah "); + ReadString(input, "blah."); + ReadString(input, "abcdefg"); + EXPECT_TRUE(input->Skip(20)); + ReadString(input, "foo"); + ReadString(input, "bar"); + + EXPECT_EQ(input->ByteCount(), 68); + + uint8 byte; + EXPECT_EQ(ReadFromInput(input, &byte, 1), 0); +} + +// =================================================================== + +TEST_F(IoTest, ArrayIo) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + int size; + { + ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]); + size = WriteStuff(&output); + } + { + ArrayInputStream input(buffer, size, kBlockSizes[j]); + ReadStuff(&input); + } + } + } +} + +// There is no string input, only string output. Also, it doesn't support +// explicit block sizes. So, we'll only run one test and we'll use +// ArrayInput to read back the results. +TEST_F(IoTest, StringIo) { + string str; + { + StringOutputStream output(&str); + WriteStuff(&output); + } + { + ArrayInputStream input(str.data(), str.size()); + ReadStuff(&input); + } +} + + +// To test files, we create a temporary file, write, read, truncate, repeat. +TEST_F(IoTest, FileIo) { + string filename = TestTempDir() + "/zero_copy_stream_test_file"; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Make a temporary file. + int file = + open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777); + ASSERT_GE(file, 0); + + { + FileOutputStream output(file, kBlockSizes[i]); + WriteStuff(&output); + EXPECT_EQ(0, output.GetErrno()); + } + + // Rewind. + ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1); + + { + FileInputStream input(file, kBlockSizes[j]); + ReadStuff(&input); + EXPECT_EQ(0, input.GetErrno()); + } + + close(file); + } + } +} + +// MSVC raises various debugging exceptions if we try to use a file +// descriptor of -1, defeating our tests below. This class will disable +// these debug assertions while in scope. +class MsvcDebugDisabler { + public: +#ifdef _MSC_VER + MsvcDebugDisabler() { + old_handler_ = _set_invalid_parameter_handler(MyHandler); + old_mode_ = _CrtSetReportMode(_CRT_ASSERT, 0); + } + ~MsvcDebugDisabler() { + old_handler_ = _set_invalid_parameter_handler(old_handler_); + old_mode_ = _CrtSetReportMode(_CRT_ASSERT, old_mode_); + } + + static void MyHandler(const wchar_t *expr, + const wchar_t *func, + const wchar_t *file, + unsigned int line, + uintptr_t pReserved) { + // do nothing + } + + _invalid_parameter_handler old_handler_; + int old_mode_; +#else + // Dummy constructor and destructor to ensure that GCC doesn't complain + // that debug_disabler is an unused variable. + MsvcDebugDisabler() {} + ~MsvcDebugDisabler() {} +#endif +}; + +// Test that FileInputStreams report errors correctly. +TEST_F(IoTest, FileReadError) { + MsvcDebugDisabler debug_disabler; + + // -1 = invalid file descriptor. + FileInputStream input(-1); + + const void* buffer; + int size; + EXPECT_FALSE(input.Next(&buffer, &size)); + EXPECT_EQ(EBADF, input.GetErrno()); +} + +// Test that FileOutputStreams report errors correctly. +TEST_F(IoTest, FileWriteError) { + MsvcDebugDisabler debug_disabler; + + // -1 = invalid file descriptor. + FileOutputStream input(-1); + + void* buffer; + int size; + + // The first call to Next() succeeds because it doesn't have anything to + // write yet. + EXPECT_TRUE(input.Next(&buffer, &size)); + + // Second call fails. + EXPECT_FALSE(input.Next(&buffer, &size)); + + EXPECT_EQ(EBADF, input.GetErrno()); +} + +// Pipes are not seekable, so File{Input,Output}Stream ends up doing some +// different things to handle them. We'll test by writing to a pipe and +// reading back from it. +TEST_F(IoTest, PipeIo) { + int files[2]; + + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + // Need to create a new pipe each time because ReadStuff() expects + // to see EOF at the end. + ASSERT_EQ(pipe(files), 0); + + { + FileOutputStream output(files[1], kBlockSizes[i]); + WriteStuff(&output); + EXPECT_EQ(0, output.GetErrno()); + } + close(files[1]); // Send EOF. + + { + FileInputStream input(files[0], kBlockSizes[j]); + ReadStuff(&input); + EXPECT_EQ(0, input.GetErrno()); + } + close(files[0]); + } + } +} + +// Test using C++ iostreams. +TEST_F(IoTest, IostreamIo) { + for (int i = 0; i < kBlockSizeCount; i++) { + for (int j = 0; j < kBlockSizeCount; j++) { + stringstream stream; + + { + OstreamOutputStream output(&stream, kBlockSizes[i]); + WriteStuff(&output); + EXPECT_FALSE(stream.fail()); + } + + { + IstreamInputStream input(&stream, kBlockSizes[j]); + ReadStuff(&input); + EXPECT_TRUE(stream.eof()); + } + } + } +} + +// To test ConcatenatingInputStream, we create several ArrayInputStreams +// covering a buffer and then concatenate them. +TEST_F(IoTest, ConcatenatingInputStream) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + // Fill the buffer. + ArrayOutputStream output(buffer, kBufferSize); + WriteStuff(&output); + + // Now split it up into multiple streams of varying sizes. + ASSERT_EQ(68, output.ByteCount()); // Test depends on this. + ArrayInputStream input1(buffer , 12); + ArrayInputStream input2(buffer + 12, 7); + ArrayInputStream input3(buffer + 19, 6); + ArrayInputStream input4(buffer + 25, 15); + ArrayInputStream input5(buffer + 40, 0); + // Note: We want to make sure we have a stream boundary somewhere between + // bytes 42 and 62, which is the range that it Skip()ed by ReadStuff(). This + // tests that a bug that existed in the original code for Skip() is fixed. + ArrayInputStream input6(buffer + 40, 10); + ArrayInputStream input7(buffer + 50, 18); // Total = 68 bytes. + + ZeroCopyInputStream* streams[] = + {&input1, &input2, &input3, &input4, &input5, &input6, &input7}; + + // Create the concatenating stream and read. + ConcatenatingInputStream input(streams, GOOGLE_ARRAYSIZE(streams)); + ReadStuff(&input); +} + +// To test LimitingInputStream, we write our golden text to a buffer, then +// create an ArrayInputStream that contains the whole buffer (not just the +// bytes written), then use a LimitingInputStream to limit it just to the +// bytes written. +TEST_F(IoTest, LimitingInputStream) { + const int kBufferSize = 256; + uint8 buffer[kBufferSize]; + + // Fill the buffer. + ArrayOutputStream output(buffer, kBufferSize); + WriteStuff(&output); + + // Set up input. + ArrayInputStream array_input(buffer, kBufferSize); + LimitingInputStream input(&array_input, output.ByteCount()); + + ReadStuff(&input); +} + +// Check that a zero-size array doesn't confuse the code. +TEST(ZeroSizeArray, Input) { + ArrayInputStream input(NULL, 0); + const void* data; + int size; + EXPECT_FALSE(input.Next(&data, &size)); +} + +TEST(ZeroSizeArray, Output) { + ArrayOutputStream output(NULL, 0); + void* data; + int size; + EXPECT_FALSE(output.Next(&data, &size)); +} + +} // namespace +} // namespace io +} // namespace protobuf +} // namespace google |