aboutsummaryrefslogblamecommitdiff
path: root/src/google/protobuf/wire_format.h
blob: 06f20d2a4b8d18cdddfd245b9009fc0ee4554d2a (plain) (tree)
1
2
3
4
5
6
7
8
                                                      
                                                   

                                     


                                                                         
  








                                                                         
  










                                                                        












                                                                               
                                    



























                                                                           
                                                                    












                                                                            

                                                               








                                                                            
                             






                                                                              
                                              































































































                                                                                
                       




                                                            
                             






                                                                           
                              



































































































































                                                                                
                        

                                                     
                             


                                     
                              




































































































                                                                                        
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.  All rights reserved.
// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Author: kenton@google.com (Kenton Varda)
//         atenasio@google.com (Chris Atenasio) (ZigZag transform)
//  Based on original Protocol Buffers design by
//  Sanjay Ghemawat, Jeff Dean, and others.
//
// This header is logically internal, but is made public because it is used
// from protocol-compiler-generated code, which may reside in other components.

#ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_H__
#define GOOGLE_PROTOBUF_WIRE_FORMAT_H__

#include <string>
#include <google/protobuf/message.h>
#include <google/protobuf/descriptor.h>

namespace google {

namespace protobuf {
  namespace io {
    class CodedInputStream;      // coded_stream.h
    class CodedOutputStream;     // coded_stream.h
  }
  class UnknownFieldSet;       // unknown_field_set.h
}

namespace protobuf {
namespace internal {

// This class is for internal use by the protocol buffer library and by
// protocol-complier-generated message classes.  It must not be called
// directly by clients.
//
// This class contains helpers for implementing the binary protocol buffer
// wire format.  These helpers are called primarily by generated code.  The
// class also contains reflection-based implementations of the wire format.
//
// This class is really a namespace that contains only static methods.
class LIBPROTOBUF_EXPORT WireFormat {
 public:
  // These procedures can be used to implement the methods of Message which
  // handle parsing and serialization of the protocol buffer wire format
  // using only the Reflection interface.  When you ask the protocol
  // compiler to optimize for code size rather than speed, it will implement
  // those methods in terms of these procedures.  Of course, these are much
  // slower than the specialized implementations which the protocol compiler
  // generates when told to optimize for speed.

  // Read a message in protocol buffer wire format.
  //
  // This procedure reads either to the end of the input stream or through
  // a WIRETYPE_END_GROUP tag ending the message, whichever comes first.
  // It returns false if the input is invalid.
  //
  // Required fields are NOT checked by this method.  You must call
  // IsInitialized() on the resulting message yourself.
  static bool ParseAndMergePartial(io::CodedInputStream* input,
                                   Message* message);

  // Serialize a message in protocol buffer wire format.
  //
  // Any embedded messages within the message must have their correct sizes
  // cached.  However, the top-level message need not; its size is passed as
  // a parameter to this procedure.
  //
  // These return false iff the underlying stream returns a write error.
  static bool SerializeWithCachedSizes(
      const Message& message,
      int size, io::CodedOutputStream* output);

  // Implements Message::ByteSize() via reflection.  WARNING:  The result
  // of this method is *not* cached anywhere.  However, all embedded messages
  // will have their ByteSize() methods called, so their sizes will be cached.
  // Therefore, calling this method is sufficient to allow you to call
  // WireFormat::SerializeWithCachedSizes() on the same object.
  static int ByteSize(const Message& message);

  // -----------------------------------------------------------------
  // Helpers for dealing with unknown fields

  // Skips a field value of the given WireType.  The input should start
  // positioned immediately after the tag.  If unknown_fields is non-NULL,
  // the contents of the field will be added to it.
  static bool SkipField(io::CodedInputStream* input, uint32 tag,
                        UnknownFieldSet* unknown_fields);

  // Reads and ignores a message from the input.  If unknown_fields is non-NULL,
  // the contents will be added to it.
  static bool SkipMessage(io::CodedInputStream* input,
                          UnknownFieldSet* unknown_fields);

  // Write the contents of an UnknownFieldSet to the output.
  static bool SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
                                     io::CodedOutputStream* output);

  // Same thing except for messages that have the message_set_wire_format
  // option.
  static bool SerializeUnknownMessageSetItems(
      const UnknownFieldSet& unknown_fields,
      io::CodedOutputStream* output);

  // Compute the size of the UnknownFieldSet on the wire.
  static int ComputeUnknownFieldsSize(const UnknownFieldSet& unknown_fields);

  // Same thing except for messages that have the message_set_wire_format
  // option.
  static int ComputeUnknownMessageSetItemsSize(
      const UnknownFieldSet& unknown_fields);

  // -----------------------------------------------------------------
  // Helper constants and functions related to the format.  These are
  // mostly meant for internal and generated code to use.

  // The wire format is composed of a sequence of tag/value pairs, each
  // of which contains the value of one field (or one element of a repeated
  // field).  Each tag is encoded as a varint.  The lower bits of the tag
  // identify its wire type, which specifies the format of the data to follow.
  // The rest of the bits contain the field number.  Each type of field (as
  // declared by FieldDescriptor::Type, in descriptor.h) maps to one of
  // these wire types.  Immediately following each tag is the field's value,
  // encoded in the format specified by the wire type.  Because the tag
  // identifies the encoding of this data, it is possible to skip
  // unrecognized fields for forwards compatibility.

  enum WireType {
    WIRETYPE_VARINT           = 0,
    WIRETYPE_FIXED64          = 1,
    WIRETYPE_LENGTH_DELIMITED = 2,
    WIRETYPE_START_GROUP      = 3,
    WIRETYPE_END_GROUP        = 4,
    WIRETYPE_FIXED32          = 5,
  };

  static inline WireType WireTypeForFieldType(FieldDescriptor::Type type) {
    return kWireTypeForFieldType[type];
  }

  // Number of bits in a tag which identify the wire type.
  static const int kTagTypeBits = 3;
  // Mask for those bits.
  static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1;

  // Helper functions for encoding and decoding tags.  (Inlined below.)
  static uint32 MakeTag(const FieldDescriptor* field);
  static uint32 MakeTag(int field_number, WireType type);
  static WireType GetTagWireType(uint32 tag);
  static int GetTagFieldNumber(uint32 tag);

  // Helper functions for converting between floats/doubles and IEEE-754
  // uint32s/uint64s so that they can be written.  (Assumes your platform
  // uses IEEE-754 floats.)
  static uint32 EncodeFloat(float value);
  static float DecodeFloat(uint32 value);
  static uint64 EncodeDouble(double value);
  static double DecodeDouble(uint64 value);

  // Helper functions for mapping signed integers to unsigned integers in
  // such a way that numbers with small magnitudes will encode to smaller
  // varints.  If you simply static_cast a negative number to an unsigned
  // number and varint-encode it, it will always take 10 bytes, defeating
  // the purpose of varint.  So, for the "sint32" and "sint64" field types,
  // we ZigZag-encode the values.
  static uint32 ZigZagEncode32(int32 n);
  static int32  ZigZagDecode32(uint32 n);
  static uint64 ZigZagEncode64(int64 n);
  static int64  ZigZagDecode64(uint64 n);

  // Parse a single field.  The input should start out positioned immidately
  // after the tag.
  static bool ParseAndMergeField(
      uint32 tag,
      const FieldDescriptor* field,        // May be NULL for unknown
      Message* message,
      io::CodedInputStream* input);

  // Serialize a single field.
  static bool SerializeFieldWithCachedSizes(
      const FieldDescriptor* field,        // Cannot be NULL
      const Message& message,
      io::CodedOutputStream* output);

  // Compute size of a single field.  If the field is a message type, this
  // will call ByteSize() for the embedded message, insuring that it caches
  // its size.
  static int FieldByteSize(
      const FieldDescriptor* field,        // Cannot be NULL
      const Message& message);

  // =================================================================
  // Methods for reading/writing individual field.  The implementations
  // of these methods are defined in wire_format_inl.h; you must #include
  // that file to use these.

// Avoid ugly line wrapping
#define input  io::CodedInputStream*  input
#define output io::CodedOutputStream* output
#define field_number int field_number
#define INL GOOGLE_ATTRIBUTE_ALWAYS_INLINE

  // Read fields, not including tags.  The assumption is that you already
  // read the tag to determine what field to read.
  static inline bool ReadInt32   (input,  int32* value);
  static inline bool ReadInt64   (input,  int64* value);
  static inline bool ReadUInt32  (input, uint32* value);
  static inline bool ReadUInt64  (input, uint64* value);
  static inline bool ReadSInt32  (input,  int32* value);
  static inline bool ReadSInt64  (input,  int64* value);
  static inline bool ReadFixed32 (input, uint32* value);
  static inline bool ReadFixed64 (input, uint64* value);
  static inline bool ReadSFixed32(input,  int32* value);
  static inline bool ReadSFixed64(input,  int64* value);
  static inline bool ReadFloat   (input,  float* value);
  static inline bool ReadDouble  (input, double* value);
  static inline bool ReadBool    (input,   bool* value);
  static inline bool ReadEnum    (input,    int* value);

  static inline bool ReadString(input, string* value);
  static inline bool ReadBytes (input, string* value);

  static inline bool ReadGroup  (field_number, input, Message* value);
  static inline bool ReadMessage(input, Message* value);

  // Like above, but de-virtualize the call to MergePartialFromCodedStream().
  // The pointer must point at an instance of MessageType, *not* a subclass (or
  // the subclass must not override MergePartialFromCodedStream()).
  template<typename MessageType>
  static inline bool ReadGroupNoVirtual(field_number, input,
                                        MessageType* value);
  template<typename MessageType>
  static inline bool ReadMessageNoVirtual(input, MessageType* value);

  // Write a tag.  The Write*() functions automatically include the tag, so
  // normally there's no need to call this.
  static inline bool WriteTag(field_number, WireType type, output) INL;

  // Write fields, including tags.
  static inline bool WriteInt32   (field_number,  int32 value, output) INL;
  static inline bool WriteInt64   (field_number,  int64 value, output) INL;
  static inline bool WriteUInt32  (field_number, uint32 value, output) INL;
  static inline bool WriteUInt64  (field_number, uint64 value, output) INL;
  static inline bool WriteSInt32  (field_number,  int32 value, output) INL;
  static inline bool WriteSInt64  (field_number,  int64 value, output) INL;
  static inline bool WriteFixed32 (field_number, uint32 value, output) INL;
  static inline bool WriteFixed64 (field_number, uint64 value, output) INL;
  static inline bool WriteSFixed32(field_number,  int32 value, output) INL;
  static inline bool WriteSFixed64(field_number,  int64 value, output) INL;
  static inline bool WriteFloat   (field_number,  float value, output) INL;
  static inline bool WriteDouble  (field_number, double value, output) INL;
  static inline bool WriteBool    (field_number,   bool value, output) INL;
  static inline bool WriteEnum    (field_number,    int value, output) INL;

  static inline bool WriteString(field_number, const string& value, output) INL;
  static inline bool WriteBytes (field_number, const string& value, output) INL;

  static inline bool WriteGroup(field_number, const Message& value, output) INL;
  static inline bool WriteMessage(
    field_number, const Message& value, output) INL;

  // Like above, but de-virtualize the call to SerializeWithCachedSizes().  The
  // pointer must point at an instance of MessageType, *not* a subclass (or
  // the subclass must not override SerializeWithCachedSizes()).
  template<typename MessageType>
  static inline bool WriteGroupNoVirtual(
    field_number, const MessageType& value, output) INL;
  template<typename MessageType>
  static inline bool WriteMessageNoVirtual(
    field_number, const MessageType& value, output) INL;

  // Compute the byte size of a tag.  For groups, this includes both the start
  // and end tags.
  static inline int TagSize(field_number, FieldDescriptor::Type type);

  // Compute the byte size of a field.  The XxSize() functions do NOT include
  // the tag, so you must also call TagSize().  (This is because, for repeated
  // fields, you should only call TagSize() once and multiply it by the element
  // count, but you may have to call XxSize() for each individual element.)
  static inline int Int32Size   ( int32 value);
  static inline int Int64Size   ( int64 value);
  static inline int UInt32Size  (uint32 value);
  static inline int UInt64Size  (uint64 value);
  static inline int SInt32Size  ( int32 value);
  static inline int SInt64Size  ( int64 value);
  static inline int EnumSize    (   int value);

  // These types always have the same size.
  static const int kFixed32Size  = 4;
  static const int kFixed64Size  = 8;
  static const int kSFixed32Size = 4;
  static const int kSFixed64Size = 8;
  static const int kFloatSize    = 4;
  static const int kDoubleSize   = 8;
  static const int kBoolSize     = 1;

  static inline int StringSize(const string& value);
  static inline int BytesSize (const string& value);

  static inline int GroupSize  (const Message& value);
  static inline int MessageSize(const Message& value);

  // Like above, but de-virtualize the call to ByteSize().  The
  // pointer must point at an instance of MessageType, *not* a subclass (or
  // the subclass must not override ByteSize()).
  template<typename MessageType>
  static inline int GroupSizeNoVirtual  (const MessageType& value);
  template<typename MessageType>
  static inline int MessageSizeNoVirtual(const MessageType& value);

#undef input
#undef output
#undef field_number
#undef INL

 private:
  static const WireType kWireTypeForFieldType[];

  // Parse/serialize a MessageSet::Item group.  Used with messages that use
  // opion message_set_wire_format = true.
  static bool ParseAndMergeMessageSetItem(
      io::CodedInputStream* input,
      Message* message);
  static bool SerializeMessageSetItemWithCachedSizes(
      const FieldDescriptor* field,
      const Message& message,
      io::CodedOutputStream* output);
  static int MessageSetItemByteSize(
      const FieldDescriptor* field,
      const Message& message);

  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
};

// inline methods ====================================================

// This macro does the same thing as WireFormat::MakeTag(), but the
// result is usable as a compile-time constant, which makes it usable
// as a switch case or a template input.  WireFormat::MakeTag() is more
// type-safe, though, so prefer it if possible.
#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE)             \
  static_cast<uint32>(                                              \
    ((FIELD_NUMBER) << ::google::protobuf::internal::WireFormat::kTagTypeBits) | (TYPE))

inline uint32 WireFormat::MakeTag(const FieldDescriptor* field) {
  return MakeTag(field->number(), WireTypeForFieldType(field->type()));
}

inline uint32 WireFormat::MakeTag(int field_number, WireType type) {
  return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type);
}

inline WireFormat::WireType WireFormat::GetTagWireType(uint32 tag) {
  return static_cast<WireType>(tag & kTagTypeMask);
}

inline int WireFormat::GetTagFieldNumber(uint32 tag) {
  return static_cast<int>(tag >> kTagTypeBits);
}

inline uint32 WireFormat::EncodeFloat(float value) {
  union {float f; uint32 i;};
  f = value;
  return i;
}

inline float WireFormat::DecodeFloat(uint32 value) {
  union {float f; uint32 i;};
  i = value;
  return f;
}

inline uint64 WireFormat::EncodeDouble(double value) {
  union {double f; uint64 i;};
  f = value;
  return i;
}

inline double WireFormat::DecodeDouble(uint64 value) {
  union {double f; uint64 i;};
  i = value;
  return f;
}

// ZigZag Transform:  Encodes signed integers so that they can be
// effectively used with varint encoding.
//
// varint operates on unsigned integers, encoding smaller numbers into
// fewer bytes.  If you try to use it on a signed integer, it will treat
// this number as a very large unsigned integer, which means that even
// small signed numbers like -1 will take the maximum number of bytes
// (10) to encode.  ZigZagEncode() maps signed integers to unsigned
// in such a way that those with a small absolute value will have smaller
// encoded values, making them appropriate for encoding using varint.
//
//       int32 ->     uint32
// -------------------------
//           0 ->          0
//          -1 ->          1
//           1 ->          2
//          -2 ->          3
//         ... ->        ...
//  2147483647 -> 4294967294
// -2147483648 -> 4294967295
//
//        >> encode >>
//        << decode <<

inline uint32 WireFormat::ZigZagEncode32(int32 n) {
  // Note:  the right-shift must be arithmetic
  return (n << 1) ^ (n >> 31);
}

inline int32 WireFormat::ZigZagDecode32(uint32 n) {
  return (n >> 1) ^ -static_cast<int32>(n & 1);
}

inline uint64 WireFormat::ZigZagEncode64(int64 n) {
  // Note:  the right-shift must be arithmetic
  return (n << 1) ^ (n >> 63);
}

inline int64 WireFormat::ZigZagDecode64(uint64 n) {
  return (n >> 1) ^ -static_cast<int64>(n & 1);
}

}  // namespace internal
}  // namespace protobuf

}  // namespace google
#endif  // GOOGLE_PROTOBUF_WIRE_FORMAT_H__