# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# https://developers.google.com/protocol-buffers/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Contains routines for printing protocol messages in JSON format."""
__author__ = 'jieluo@google.com (Jie Luo)'
import base64
from datetime import datetime
import json
import math
import re
from google.protobuf import descriptor
_TIMESTAMPFOMAT = '%Y-%m-%dT%H:%M:%S'
_NUMBER = re.compile(u'[0-9+-][0-9e.+-]*')
_INTEGER = re.compile(u'[0-9+-]')
_INT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT32,
descriptor.FieldDescriptor.CPPTYPE_UINT32,
descriptor.FieldDescriptor.CPPTYPE_INT64,
descriptor.FieldDescriptor.CPPTYPE_UINT64])
_INT64_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT64,
descriptor.FieldDescriptor.CPPTYPE_UINT64])
_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
if str is bytes:
_UNICODETYPE = unicode
else:
_UNICODETYPE = str
class SerializeToJsonError(Exception):
"""Thrown if serialization to JSON fails."""
class ParseError(Exception):
"""Thrown in case of parsing error."""
def MessageToJson(message, including_default_value_fields=False):
"""Converts protobuf message to JSON format.
Args:
message: The protocol buffers message instance to serialize.
including_default_value_fields: If True, singular primitive fields,
repeated fields, and map fields will always be serialized. If
False, only serialize non-empty fields. Singular message fields
and oneof fields are not affected by this option.
Returns:
A string containing the JSON formatted protocol buffer message.
"""
js = _MessageToJsonObject(message, including_default_value_fields)
return json.dumps(js, indent=2)
def _MessageToJsonObject(message, including_default_value_fields):
"""Converts message to an object according to Proto3 JSON Specification."""
message_descriptor = message.DESCRIPTOR
if _IsTimestampMessage(message_descriptor):
return _TimestampMessageToJsonObject(message)
if _IsDurationMessage(message_descriptor):
return _DurationMessageToJsonObject(message)
if _IsFieldMaskMessage(message_descriptor):
return _FieldMaskMessageToJsonObject(message)
if _IsWrapperMessage(message_descriptor):
return _WrapperMessageToJsonObject(message)
return _RegularMessageToJsonObject(message, including_default_value_fields)
def _IsMapEntry(field):
return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
field.message_type.has_options and
field.message_type.GetOptions().map_entry)
def _RegularMessageToJsonObject(message, including_default_value_fields):
"""Converts normal message according to Proto3 JSON Specification."""
js = {}
fields = message.ListFields()
try:
for field, value in fields:
name = field.camelcase_name
if _IsMapEntry(field):
# Convert a map field.
js_map = {}
for key in value:
js_map[key] = _ConvertFieldToJsonObject(
field.message_type.fields_by_name['value'],
value[key], including_default_value_fields)
js[name] = js_map
elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
# Convert a repeated field.
repeated = []
for element in value:
repeated.append(_ConvertFieldToJsonObject(
field, element, including_default_value_fields))
js[name] = repeated
else:
js[name] = _ConvertFieldToJsonObject(
field, value, including_default_value_fields)
# Serialize default value if including_default_value_fields is True.
if including_default_value_fields:
message_descriptor = message.DESCRIPTOR
for field in message_descriptor.fields:
# Singular message fields and oneof fields will not be affected.
if ((field.label != descriptor.FieldDescriptor.LABEL_REPEATED and
field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE) or
field.containing_oneof):
continue
name = field.camelcase_name
if name in js:
# Skip the field which has been serailized already.
continue
if _IsMapEntry(field):
js[name] = {}
elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
js[name] = []
else:
js[name] = _ConvertFieldToJsonObject(field, field.default_value)
except ValueError as e:
raise SerializeToJsonError(
'Failed to serialize {0} field: {1}'.format(field.name, e))
return js
def _ConvertFieldToJsonObject(
field, value, including_default_value_fields=False):
"""Converts field value according to Proto3 JSON Specification."""
if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
return _MessageToJsonObject(value, including_default_value_fields)
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
enum_value = field.enum_type.values_by_number.get(value, None)
if enum_value is not None:
return enum_value.name
else:
raise SerializeToJsonError('Enum field contains an integer value '
'which can not mapped to an enum value.')
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
# Use base64 Data encoding for bytes
return base64.b64encode(value).decode('utf-8')
else:
return value
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
if value:
return True
else:
return False
elif field.cpp_type in _INT64_TYPES:
return str(value)
elif field.cpp_type in _FLOAT_TYPES:
if math.isinf(value):
if value < 0.0:
return '-Infinity'
else:
return 'Infinity'
if math.isnan(value):
return 'NaN'
return value
def _IsTimestampMessage(message_descriptor):
return (message_descriptor.name == 'Timestamp' and
message_descriptor.file.name == 'google/protobuf/timestamp.proto')
def _TimestampMessageToJsonObject(message):
"""Converts Timestamp message according to Proto3 JSON Specification."""
nanos = message.nanos % 1e9
dt = datetime.utcfromtimestamp(
message.seconds + (message.nanos - nanos) / 1e9)
result = dt.isoformat()
if (nanos % 1e9) == 0:
# If there are 0 fractional digits, the fractional
# point '.' should be omitted when serializing.
return result + 'Z'
if (nanos % 1e6) == 0:
# Serialize 3 fractional digits.
return result + '.%03dZ' % (nanos / 1e6)
if (nanos % 1e3) == 0:
# Serialize 6 fractional digits.
return result + '.%06dZ' % (nanos / 1e3)
# Serialize 9 fractional digits.
return result + '.%09dZ' % nanos
def _IsDurationMessage(message_descriptor):
return (message_descriptor.name == 'Duration' and
message_descriptor.file.name == 'google/protobuf/duration.proto')
def _DurationMessageToJsonObject(message):
"""Converts Duration message according to Proto3 JSON Specification."""
if message.seconds < 0 or message.nanos < 0:
result = '-'
seconds = - message.seconds + int((0 - message.nanos) / 1e9)
nanos = (0 - message.nanos) % 1e9
else:
result = ''
seconds = message.seconds + int(message.nanos / 1e9)
nanos = message.nanos % 1e9
result += '%d' % seconds
if (nanos % 1e9) == 0:
# If there are 0 fractional digits, the fractional
# point '.' should be omitted when serializing.
return result + 's'
if (nanos % 1e6) == 0:
# Serialize 3 fractional digits.
return result + '.%03ds' % (nanos / 1e6)
if (nanos % 1e3) == 0:
# Serialize 6 fractional digits.
return result + '.%06ds' % (nanos / 1e3)
# Serialize 9 fractional digits.
return result + '.%09ds' % nanos
def _IsFieldMaskMessage(message_descriptor):
return (message_descriptor.name == 'FieldMask' and
message_descriptor.file.name == 'google/protobuf/field_mask.proto')
def _FieldMaskMessageToJsonObject(message):
"""Converts FieldMask message according to Proto3 JSON Specification."""
result = ''
first = True
for path in message.paths:
if not first:
result += ','
result += path
first = False
return result
def _IsWrapperMessage(message_descriptor):
return message_descriptor.file.name == 'google/protobuf/wrappers.proto'
def _WrapperMessageToJsonObject(message):
return _ConvertFieldToJsonObject(
message.DESCRIPTOR.fields_by_name['value'], message.value)
def _DuplicateChecker(js):
result = {}
for name, value in js:
if name in result:
raise ParseError('Failed to load JSON: duplicate key ' + name)
result[name] = value
return result
def Parse(text, message):
"""Parses a JSON representation of a protocol message into a message.
Args:
text: Message JSON representation.
message: A protocol beffer message to merge into.
Returns:
The same message passed as argument.
Raises::
ParseError: On JSON parsing problems.
"""
if not isinstance(text, _UNICODETYPE): text = text.decode('utf-8')
try:
js = json.loads(text, object_pairs_hook=_DuplicateChecker)
except ValueError as e:
raise ParseError('Failed to load JSON: ' + str(e))
_ConvertFieldValuePair(js, message)
return message
def _ConvertFieldValuePair(js, message):
"""Convert field value pairs into regular message.
Args:
js: A JSON object to convert the field value pairs.
message: A regular protocol message to record the data.
Raises:
ParseError: In case of problems converting.
"""
names = []
message_descriptor = message.DESCRIPTOR
for name in js:
try:
field = message_descriptor.fields_by_camelcase_name.get(name, None)
if not field:
raise ParseError(
'Message type "{0}" has no field named "{1}".'.format(
message_descriptor.full_name, name))
if name in names:
raise ParseError(
'Message type "{0}" should not have multiple "{1}" fields.'.format(
message.DESCRIPTOR.full_name, name))
names.append(name)
# Check no other oneof field is parsed.
if field.containing_oneof is not None:
oneof_name = field.containing_oneof.name
if oneof_name in names:
raise ParseError('Message type "{0}" should not have multiple "{1}" '
'oneof fields.'.format(
message.DESCRIPTOR.full_name, oneof_name))
names.append(oneof_name)
value = js[name]
if value is None:
message.ClearField(field.name)
continue
# Parse field value.
if _IsMapEntry(field):
message.ClearField(field.name)
_ConvertMapFieldValue(value, message, field)
elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
message.ClearField(field.name)
if not isinstance(value, list):
raise ParseError('repeated field {0} must be in [] which is '
'{1}'.format(name, value))
for item in value:
if item is None:
continue
if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
sub_message = getattr(message, field.name).add()
_ConvertMessage(item, sub_message)
else:
getattr(message, field.name).append(
_ConvertScalarFieldValue(item, field))
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
sub_message = getattr(message, field.name)
_ConvertMessage(value, sub_message)
else:
setattr(message, field.name, _ConvertScalarFieldValue(value, field))
except ParseError as e:
if field and field.containing_oneof is None:
raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
else:
raise ParseError(str(e))
except ValueError as e:
raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
except TypeError as e:
raise ParseError('Failed to parse {0} field: {1}'.format(name, e))
def _ConvertMessage(value, message):
"""Convert a JSON object into a message.
Args:
value: A JSON object.
message: A WKT or regular protocol message to record the data.
Raises:
ParseError: In case of convert problems.
"""
message_descriptor = message.DESCRIPTOR
if _IsTimestampMessage(message_descriptor):
_ConvertTimestampMessage(value, message)
elif _IsDurationMessage(message_descriptor):
_ConvertDurationMessage(value, message)
elif _IsFieldMaskMessage(message_descriptor):
_ConvertFieldMaskMessage(value, message)
elif _IsWrapperMessage(message_descriptor):
_ConvertWrapperMessage(value, message)
else:
_ConvertFieldValuePair(value, message)
def _ConvertTimestampMessage(value, message):
"""Convert a JSON representation into Timestamp message."""
timezone_offset = value.find('Z')
if timezone_offset == -1:
timezone_offset = value.find('+')
if timezone_offset == -1:
timezone_offset = value.rfind('-')
if timezone_offset == -1:
raise ParseError(
'Failed to parse timestamp: missing valid timezone offset.')
time_value = value[0:timezone_offset]
# Parse datetime and nanos
point_position = time_value.find('.')
if point_position == -1:
second_value = time_value
nano_value = ''
else:
second_value = time_value[:point_position]
nano_value = time_value[point_position + 1:]
date_object = datetime.strptime(second_value, _TIMESTAMPFOMAT)
seconds = (date_object - datetime(1970, 1, 1)).total_seconds()
if len(nano_value) > 9:
raise ParseError(
'Failed to parse Timestamp: nanos {0} more than '
'9 fractional digits.'.format(nano_value))
if nano_value:
nanos = round(float('0.' + nano_value) * 1e9)
else:
nanos = 0
# Parse timezone offsets
if value[timezone_offset] == 'Z':
if len(value) != timezone_offset + 1:
raise ParseError(
'Failed to parse timestamp: invalid trailing data {0}.'.format(value))
else:
timezone = value[timezone_offset:]
pos = timezone.find(':')
if pos == -1:
raise ParseError(
'Invalid timezone offset value: ' + timezone)
if timezone[0] == '+':
seconds += (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60
else:
seconds -= (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60
# Set seconds and nanos
message.seconds = int(seconds)
message.nanos = int(nanos)
def _ConvertDurationMessage(value, message):
"""Convert a JSON representation into Duration message."""
if value[-1] != 's':
raise ParseError(
'Duration must end with letter "s": ' + value)
try:
duration = float(value[:-1])
except ValueError:
raise ParseError(
'Couldn\'t parse duration: ' + value)
message.seconds = int(duration)
message.nanos = int(round((duration - message.seconds) * 1e9))
def _ConvertFieldMaskMessage(value, message):
"""Convert a JSON representation into FieldMask message."""
for path in value.split(','):
message.paths.append(path)
def _ConvertWrapperMessage(value, message):
"""Convert a JSON representation into Wrapper message."""
field = message.DESCRIPTOR.fields_by_name['value']
setattr(message, 'value', _ConvertScalarFieldValue(value, field))
def _ConvertMapFieldValue(value, message, field):
"""Convert map field value for a message map field.
Args:
value: A JSON object to convert the map field value.
message: A protocol message to record the converted data.
field: The descriptor of the map field to be converted.
Raises:
ParseError: In case of convert problems.
"""
if not isinstance(value, dict):
raise ParseError(
'Map fieled {0} must be in {} which is {1}.'.format(field.name, value))
key_field = field.message_type.fields_by_name['key']
value_field = field.message_type.fields_by_name['value']
for key in value:
key_value = _ConvertScalarFieldValue(key, key_field, True)
if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
_ConvertMessage(value[key], getattr(message, field.name)[key_value])
else:
getattr(message, field.name)[key_value] = _ConvertScalarFieldValue(
value[key], value_field)
def _ConvertScalarFieldValue(value, field, require_quote=False):
"""Convert a single scalar field value.
Args:
value: A scalar value to convert the scalar field value.
field: The descriptor of the field to convert.
require_quote: If True, '"' is required for the field value.
Returns:
The converted scalar field value
Raises:
ParseError: In case of convert problems.
"""
if field.cpp_type in _INT_TYPES:
return _ConvertInteger(value)
elif field.cpp_type in _FLOAT_TYPES:
return _ConvertFloat(value)
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
return _ConvertBool(value, require_quote)
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
return base64.b64decode(value)
else:
return value
elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
# Convert an enum value.
enum_value = field.enum_type.values_by_name.get(value, None)
if enum_value is None:
raise ParseError(
'Enum value must be a string literal with double quotes. '
'Type "{0}" has no value named {1}.'.format(
field.enum_type.full_name, value))
return enum_value.number
def _ConvertInteger(value):
"""Convert an integer.
Args:
value: A scalar value to convert.
Returns:
The integer value.
Raises:
ParseError: If an integer couldn't be consumed.
"""
if isinstance(value, float):
raise ParseError('Couldn\'t parse integer: {0}'.format(value))
if isinstance(value, _UNICODETYPE) and not _INTEGER.match(value):
raise ParseError('Couldn\'t parse integer: "{0}"'.format(value))
return int(value)
def _ConvertFloat(value):
"""Convert an floating point number."""
if value == 'nan':
raise ParseError('Couldn\'t parse float "nan", use "NaN" instead')
try:
# Assume Python compatible syntax.
return float(value)
except ValueError:
# Check alternative spellings.
if value == '-Infinity':
return float('-inf')
elif value == 'Infinity':
return float('inf')
elif value == 'NaN':
return float('nan')
else:
raise ParseError('Couldn\'t parse float: {0}'.format(value))
def _ConvertBool(value, require_quote):
"""Convert a boolean value.
Args:
value: A scalar value to convert.
require_quote: If True, '"' is required for the boolean value.
Returns:
The bool parsed.
Raises:
ParseError: If a boolean value couldn't be consumed.
"""
if require_quote:
if value == 'true':
return True
elif value == 'false':
return False
else:
raise ParseError('Expect "true" or "false", not {0}.'.format(value))
if not isinstance(value, bool):
raise ParseError('Expected true or false without quotes.')
return value