diff options
Diffstat (limited to 'python/google/protobuf/internal/input_stream.py')
-rwxr-xr-x | python/google/protobuf/internal/input_stream.py | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/python/google/protobuf/internal/input_stream.py b/python/google/protobuf/internal/input_stream.py new file mode 100755 index 00000000..9f3b0f5a --- /dev/null +++ b/python/google/protobuf/internal/input_stream.py @@ -0,0 +1,211 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. +# http://code.google.com/p/protobuf/ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""InputStream is the primitive interface for reading bits from the wire. + +All protocol buffer deserialization can be expressed in terms of +the InputStream primitives provided here. +""" + +__author__ = 'robinson@google.com (Will Robinson)' + +import struct +from google.protobuf import message +from google.protobuf.internal import wire_format + + +# Note that much of this code is ported from //net/proto/ProtocolBuffer, and +# that the interface is strongly inspired by CodedInputStream from the C++ +# proto2 implementation. + + +class InputStream(object): + + """Contains all logic for reading bits, and dealing with stream position. + + If an InputStream method ever raises an exception, the stream is left + in an indeterminate state and is not safe for further use. + """ + + def __init__(self, s): + # What we really want is something like array('B', s), where elements we + # read from the array are already given to us as one-byte integers. BUT + # using array() instead of buffer() would force full string copies to result + # from each GetSubBuffer() call. + # + # So, if the N serialized bytes of a single protocol buffer object are + # split evenly between 2 child messages, and so on recursively, using + # array('B', s) instead of buffer() would incur an additional N*logN bytes + # copied during deserialization. + # + # The higher constant overhead of having to ord() for every byte we read + # from the buffer in _ReadVarintHelper() could definitely lead to worse + # performance in many real-world scenarios, even if the asymptotic + # complexity is better. However, our real answer is that the mythical + # Python/C extension module output mode for the protocol compiler will + # be blazing-fast and will eliminate most use of this class anyway. + self._buffer = buffer(s) + self._pos = 0 + + def EndOfStream(self): + """Returns true iff we're at the end of the stream. + If this returns true, then a call to any other InputStream method + will raise an exception. + """ + return self._pos >= len(self._buffer) + + def Position(self): + """Returns the current position in the stream, or equivalently, the + number of bytes read so far. + """ + return self._pos + + def GetSubBuffer(self, size=None): + """Returns a sequence-like object that represents a portion of our + underlying sequence. + + Position 0 in the returned object corresponds to self.Position() + in this stream. + + If size is specified, then the returned object ends after the + next "size" bytes in this stream. If size is not specified, + then the returned object ends at the end of this stream. + + We guarantee that the returned object R supports the Python buffer + interface (and thus that the call buffer(R) will work). + + Note that the returned buffer is read-only. + + The intended use for this method is for nested-message and nested-group + deserialization, where we want to make a recursive MergeFromString() + call on the portion of the original sequence that contains the serialized + nested message. (And we'd like to do so without making unnecessary string + copies). + + REQUIRES: size is nonnegative. + """ + # Note that buffer() doesn't perform any actual string copy. + if size is None: + return buffer(self._buffer, self._pos) + else: + if size < 0: + raise message.DecodeError('Negative size %d' % size) + return buffer(self._buffer, self._pos, size) + + def SkipBytes(self, num_bytes): + """Skip num_bytes bytes ahead, or go to the end of the stream, whichever + comes first. + + REQUIRES: num_bytes is nonnegative. + """ + if num_bytes < 0: + raise message.DecodeError('Negative num_bytes %d' % num_bytes) + self._pos += num_bytes + self._pos = min(self._pos, len(self._buffer)) + + def ReadString(self, size): + """Reads up to 'size' bytes from the stream, stopping early + only if we reach the end of the stream. Returns the bytes read + as a string. + """ + if size < 0: + raise message.DecodeError('Negative size %d' % size) + s = (self._buffer[self._pos : self._pos + size]) + self._pos += len(s) # Only advance by the number of bytes actually read. + return s + + def ReadLittleEndian32(self): + """Interprets the next 4 bytes of the stream as a little-endian + encoded, unsiged 32-bit integer, and returns that integer. + """ + try: + i = struct.unpack(wire_format.FORMAT_UINT32_LITTLE_ENDIAN, + self._buffer[self._pos : self._pos + 4]) + self._pos += 4 + return i[0] # unpack() result is a 1-element tuple. + except struct.error, e: + raise message.DecodeError(e) + + def ReadLittleEndian64(self): + """Interprets the next 8 bytes of the stream as a little-endian + encoded, unsiged 64-bit integer, and returns that integer. + """ + try: + i = struct.unpack(wire_format.FORMAT_UINT64_LITTLE_ENDIAN, + self._buffer[self._pos : self._pos + 8]) + self._pos += 8 + return i[0] # unpack() result is a 1-element tuple. + except struct.error, e: + raise message.DecodeError(e) + + def ReadVarint32(self): + """Reads a varint from the stream, interprets this varint + as a signed, 32-bit integer, and returns the integer. + """ + i = self.ReadVarint64() + if not wire_format.INT32_MIN <= i <= wire_format.INT32_MAX: + raise message.DecodeError('Value out of range for int32: %d' % i) + return int(i) + + def ReadVarUInt32(self): + """Reads a varint from the stream, interprets this varint + as an unsigned, 32-bit integer, and returns the integer. + """ + i = self.ReadVarUInt64() + if i > wire_format.UINT32_MAX: + raise message.DecodeError('Value out of range for uint32: %d' % i) + return i + + def ReadVarint64(self): + """Reads a varint from the stream, interprets this varint + as a signed, 64-bit integer, and returns the integer. + """ + i = self.ReadVarUInt64() + if i > wire_format.INT64_MAX: + i -= (1 << 64) + return i + + def ReadVarUInt64(self): + """Reads a varint from the stream, interprets this varint + as an unsigned, 64-bit integer, and returns the integer. + """ + i = self._ReadVarintHelper() + if not 0 <= i <= wire_format.UINT64_MAX: + raise message.DecodeError('Value out of range for uint64: %d' % i) + return i + + def _ReadVarintHelper(self): + """Helper for the various varint-reading methods above. + Reads an unsigned, varint-encoded integer from the stream and + returns this integer. + + Does no bounds checking except to ensure that we read at most as many bytes + as could possibly be present in a varint-encoded 64-bit number. + """ + result = 0 + shift = 0 + while 1: + if shift >= 64: + raise message.DecodeError('Too many bytes when decoding varint.') + try: + b = ord(self._buffer[self._pos]) + except IndexError: + raise message.DecodeError('Truncated varint.') + self._pos += 1 + result |= ((b & 0x7f) << shift) + shift += 7 + if not (b & 0x80): + return result |