diff options
Diffstat (limited to 'python/google/protobuf/json_format.py')
-rw-r--r-- | python/google/protobuf/json_format.py | 601 |
1 files changed, 601 insertions, 0 deletions
diff --git a/python/google/protobuf/json_format.py b/python/google/protobuf/json_format.py new file mode 100644 index 00000000..09110e04 --- /dev/null +++ b/python/google/protobuf/json_format.py @@ -0,0 +1,601 @@ +# Protocol Buffers - Google's data interchange format +# Copyright 2008 Google Inc. All rights reserved. +# https://developers.google.com/protocol-buffers/ +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Contains routines for printing protocol messages in JSON format.""" + +__author__ = 'jieluo@google.com (Jie Luo)' + +import base64 +from datetime import datetime +import json +import math +import re + +from google.protobuf import descriptor + +_TIMESTAMPFOMAT = '%Y-%m-%dT%H:%M:%S' +_NUMBER = re.compile(u'[0-9+-][0-9e.+-]*') +_INTEGER = re.compile(u'[0-9+-]') +_INT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT32, + descriptor.FieldDescriptor.CPPTYPE_UINT32, + descriptor.FieldDescriptor.CPPTYPE_INT64, + descriptor.FieldDescriptor.CPPTYPE_UINT64]) +_INT64_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_INT64, + descriptor.FieldDescriptor.CPPTYPE_UINT64]) +_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, + descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) +if str is bytes: + _UNICODETYPE = unicode +else: + _UNICODETYPE = str + + +class SerializeToJsonError(Exception): + """Thrown if serialization to JSON fails.""" + + +class ParseError(Exception): + """Thrown in case of parsing error.""" + + +def MessageToJson(message, including_default_value_fields=False): + """Converts protobuf message to JSON format. + + Args: + message: The protocol buffers message instance to serialize. + including_default_value_fields: If True, singular primitive fields, + repeated fields, and map fields will always be serialized. If + False, only serialize non-empty fields. Singular message fields + and oneof fields are not affected by this option. + + Returns: + A string containing the JSON formatted protocol buffer message. + """ + js = _MessageToJsonObject(message, including_default_value_fields) + return json.dumps(js, indent=2) + + +def _MessageToJsonObject(message, including_default_value_fields): + """Converts message to an object according to Proto3 JSON Specification.""" + message_descriptor = message.DESCRIPTOR + if _IsTimestampMessage(message_descriptor): + return _TimestampMessageToJsonObject(message) + if _IsDurationMessage(message_descriptor): + return _DurationMessageToJsonObject(message) + if _IsFieldMaskMessage(message_descriptor): + return _FieldMaskMessageToJsonObject(message) + if _IsWrapperMessage(message_descriptor): + return _WrapperMessageToJsonObject(message) + return _RegularMessageToJsonObject(message, including_default_value_fields) + + +def _IsMapEntry(field): + return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and + field.message_type.has_options and + field.message_type.GetOptions().map_entry) + + +def _RegularMessageToJsonObject(message, including_default_value_fields): + """Converts normal message according to Proto3 JSON Specification.""" + js = {} + fields = message.ListFields() + + try: + for field, value in fields: + name = field.camelcase_name + if _IsMapEntry(field): + # Convert a map field. + js_map = {} + for key in value: + js_map[key] = _ConvertFieldToJsonObject( + field.message_type.fields_by_name['value'], + value[key], including_default_value_fields) + js[name] = js_map + elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: + # Convert a repeated field. + repeated = [] + for element in value: + repeated.append(_ConvertFieldToJsonObject( + field, element, including_default_value_fields)) + js[name] = repeated + else: + js[name] = _ConvertFieldToJsonObject( + field, value, including_default_value_fields) + + # Serialize default value if including_default_value_fields is True. + if including_default_value_fields: + message_descriptor = message.DESCRIPTOR + for field in message_descriptor.fields: + # Singular message fields and oneof fields will not be affected. + if ((field.label != descriptor.FieldDescriptor.LABEL_REPEATED and + field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE) or + field.containing_oneof): + continue + name = field.camelcase_name + if name in js: + # Skip the field which has been serailized already. + continue + if _IsMapEntry(field): + js[name] = {} + elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: + js[name] = [] + else: + js[name] = _ConvertFieldToJsonObject(field, field.default_value) + + except ValueError as e: + raise SerializeToJsonError( + 'Failed to serialize {0} field: {1}'.format(field.name, e)) + + return js + + +def _ConvertFieldToJsonObject( + field, value, including_default_value_fields=False): + """Converts field value according to Proto3 JSON Specification.""" + if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: + return _MessageToJsonObject(value, including_default_value_fields) + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: + enum_value = field.enum_type.values_by_number.get(value, None) + if enum_value is not None: + return enum_value.name + else: + raise SerializeToJsonError('Enum field contains an integer value ' + 'which can not mapped to an enum value.') + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: + if field.type == descriptor.FieldDescriptor.TYPE_BYTES: + # Use base64 Data encoding for bytes + return base64.b64encode(value).decode('utf-8') + else: + return value + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: + if value: + return True + else: + return False + elif field.cpp_type in _INT64_TYPES: + return str(value) + elif field.cpp_type in _FLOAT_TYPES: + if math.isinf(value): + if value < 0.0: + return '-Infinity' + else: + return 'Infinity' + if math.isnan(value): + return 'NaN' + return value + + +def _IsTimestampMessage(message_descriptor): + return (message_descriptor.name == 'Timestamp' and + message_descriptor.file.name == 'google/protobuf/timestamp.proto') + + +def _TimestampMessageToJsonObject(message): + """Converts Timestamp message according to Proto3 JSON Specification.""" + nanos = message.nanos % 1e9 + dt = datetime.utcfromtimestamp( + message.seconds + (message.nanos - nanos) / 1e9) + result = dt.isoformat() + if (nanos % 1e9) == 0: + # If there are 0 fractional digits, the fractional + # point '.' should be omitted when serializing. + return result + 'Z' + if (nanos % 1e6) == 0: + # Serialize 3 fractional digits. + return result + '.%03dZ' % (nanos / 1e6) + if (nanos % 1e3) == 0: + # Serialize 6 fractional digits. + return result + '.%06dZ' % (nanos / 1e3) + # Serialize 9 fractional digits. + return result + '.%09dZ' % nanos + + +def _IsDurationMessage(message_descriptor): + return (message_descriptor.name == 'Duration' and + message_descriptor.file.name == 'google/protobuf/duration.proto') + + +def _DurationMessageToJsonObject(message): + """Converts Duration message according to Proto3 JSON Specification.""" + if message.seconds < 0 or message.nanos < 0: + result = '-' + seconds = - message.seconds + int((0 - message.nanos) / 1e9) + nanos = (0 - message.nanos) % 1e9 + else: + result = '' + seconds = message.seconds + int(message.nanos / 1e9) + nanos = message.nanos % 1e9 + result += '%d' % seconds + if (nanos % 1e9) == 0: + # If there are 0 fractional digits, the fractional + # point '.' should be omitted when serializing. + return result + 's' + if (nanos % 1e6) == 0: + # Serialize 3 fractional digits. + return result + '.%03ds' % (nanos / 1e6) + if (nanos % 1e3) == 0: + # Serialize 6 fractional digits. + return result + '.%06ds' % (nanos / 1e3) + # Serialize 9 fractional digits. + return result + '.%09ds' % nanos + + +def _IsFieldMaskMessage(message_descriptor): + return (message_descriptor.name == 'FieldMask' and + message_descriptor.file.name == 'google/protobuf/field_mask.proto') + + +def _FieldMaskMessageToJsonObject(message): + """Converts FieldMask message according to Proto3 JSON Specification.""" + result = '' + first = True + for path in message.paths: + if not first: + result += ',' + result += path + first = False + return result + + +def _IsWrapperMessage(message_descriptor): + return message_descriptor.file.name == 'google/protobuf/wrappers.proto' + + +def _WrapperMessageToJsonObject(message): + return _ConvertFieldToJsonObject( + message.DESCRIPTOR.fields_by_name['value'], message.value) + + +def _DuplicateChecker(js): + result = {} + for name, value in js: + if name in result: + raise ParseError('Failed to load JSON: duplicate key ' + name) + result[name] = value + return result + + +def Parse(text, message): + """Parses a JSON representation of a protocol message into a message. + + Args: + text: Message JSON representation. + message: A protocol beffer message to merge into. + + Returns: + The same message passed as argument. + + Raises:: + ParseError: On JSON parsing problems. + """ + if not isinstance(text, _UNICODETYPE): text = text.decode('utf-8') + try: + js = json.loads(text, object_pairs_hook=_DuplicateChecker) + except ValueError as e: + raise ParseError('Failed to load JSON: ' + str(e)) + _ConvertFieldValuePair(js, message) + return message + + +def _ConvertFieldValuePair(js, message): + """Convert field value pairs into regular message. + + Args: + js: A JSON object to convert the field value pairs. + message: A regular protocol message to record the data. + + Raises: + ParseError: In case of problems converting. + """ + names = [] + message_descriptor = message.DESCRIPTOR + for name in js: + try: + field = message_descriptor.fields_by_camelcase_name.get(name, None) + if not field: + raise ParseError( + 'Message type "{0}" has no field named "{1}".'.format( + message_descriptor.full_name, name)) + if name in names: + raise ParseError( + 'Message type "{0}" should not have multiple "{1}" fields.'.format( + message.DESCRIPTOR.full_name, name)) + names.append(name) + # Check no other oneof field is parsed. + if field.containing_oneof is not None: + oneof_name = field.containing_oneof.name + if oneof_name in names: + raise ParseError('Message type "{0}" should not have multiple "{1}" ' + 'oneof fields.'.format( + message.DESCRIPTOR.full_name, oneof_name)) + names.append(oneof_name) + + value = js[name] + if value is None: + message.ClearField(field.name) + continue + + # Parse field value. + if _IsMapEntry(field): + message.ClearField(field.name) + _ConvertMapFieldValue(value, message, field) + elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: + message.ClearField(field.name) + if not isinstance(value, list): + raise ParseError('repeated field {0} must be in [] which is ' + '{1}'.format(name, value)) + for item in value: + if item is None: + continue + if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: + sub_message = getattr(message, field.name).add() + _ConvertMessage(item, sub_message) + else: + getattr(message, field.name).append( + _ConvertScalarFieldValue(item, field)) + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: + sub_message = getattr(message, field.name) + _ConvertMessage(value, sub_message) + else: + setattr(message, field.name, _ConvertScalarFieldValue(value, field)) + except ParseError as e: + if field and field.containing_oneof is None: + raise ParseError('Failed to parse {0} field: {1}'.format(name, e)) + else: + raise ParseError(str(e)) + except ValueError as e: + raise ParseError('Failed to parse {0} field: {1}'.format(name, e)) + except TypeError as e: + raise ParseError('Failed to parse {0} field: {1}'.format(name, e)) + + +def _ConvertMessage(value, message): + """Convert a JSON object into a message. + + Args: + value: A JSON object. + message: A WKT or regular protocol message to record the data. + + Raises: + ParseError: In case of convert problems. + """ + message_descriptor = message.DESCRIPTOR + if _IsTimestampMessage(message_descriptor): + _ConvertTimestampMessage(value, message) + elif _IsDurationMessage(message_descriptor): + _ConvertDurationMessage(value, message) + elif _IsFieldMaskMessage(message_descriptor): + _ConvertFieldMaskMessage(value, message) + elif _IsWrapperMessage(message_descriptor): + _ConvertWrapperMessage(value, message) + else: + _ConvertFieldValuePair(value, message) + + +def _ConvertTimestampMessage(value, message): + """Convert a JSON representation into Timestamp message.""" + timezone_offset = value.find('Z') + if timezone_offset == -1: + timezone_offset = value.find('+') + if timezone_offset == -1: + timezone_offset = value.rfind('-') + if timezone_offset == -1: + raise ParseError( + 'Failed to parse timestamp: missing valid timezone offset.') + time_value = value[0:timezone_offset] + # Parse datetime and nanos + point_position = time_value.find('.') + if point_position == -1: + second_value = time_value + nano_value = '' + else: + second_value = time_value[:point_position] + nano_value = time_value[point_position + 1:] + date_object = datetime.strptime(second_value, _TIMESTAMPFOMAT) + seconds = (date_object - datetime(1970, 1, 1)).total_seconds() + if len(nano_value) > 9: + raise ParseError( + 'Failed to parse Timestamp: nanos {0} more than ' + '9 fractional digits.'.format(nano_value)) + if nano_value: + nanos = round(float('0.' + nano_value) * 1e9) + else: + nanos = 0 + # Parse timezone offsets + if value[timezone_offset] == 'Z': + if len(value) != timezone_offset + 1: + raise ParseError( + 'Failed to parse timestamp: invalid trailing data {0}.'.format(value)) + else: + timezone = value[timezone_offset:] + pos = timezone.find(':') + if pos == -1: + raise ParseError( + 'Invalid timezone offset value: ' + timezone) + if timezone[0] == '+': + seconds += (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60 + else: + seconds -= (int(timezone[1:pos])*60+int(timezone[pos+1:]))*60 + # Set seconds and nanos + message.seconds = int(seconds) + message.nanos = int(nanos) + + +def _ConvertDurationMessage(value, message): + """Convert a JSON representation into Duration message.""" + if value[-1] != 's': + raise ParseError( + 'Duration must end with letter "s": ' + value) + try: + duration = float(value[:-1]) + except ValueError: + raise ParseError( + 'Couldn\'t parse duration: ' + value) + message.seconds = int(duration) + message.nanos = int(round((duration - message.seconds) * 1e9)) + + +def _ConvertFieldMaskMessage(value, message): + """Convert a JSON representation into FieldMask message.""" + for path in value.split(','): + message.paths.append(path) + + +def _ConvertWrapperMessage(value, message): + """Convert a JSON representation into Wrapper message.""" + field = message.DESCRIPTOR.fields_by_name['value'] + setattr(message, 'value', _ConvertScalarFieldValue(value, field)) + + +def _ConvertMapFieldValue(value, message, field): + """Convert map field value for a message map field. + + Args: + value: A JSON object to convert the map field value. + message: A protocol message to record the converted data. + field: The descriptor of the map field to be converted. + + Raises: + ParseError: In case of convert problems. + """ + if not isinstance(value, dict): + raise ParseError( + 'Map fieled {0} must be in {} which is {1}.'.format(field.name, value)) + key_field = field.message_type.fields_by_name['key'] + value_field = field.message_type.fields_by_name['value'] + for key in value: + key_value = _ConvertScalarFieldValue(key, key_field, True) + if value_field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: + _ConvertMessage(value[key], getattr(message, field.name)[key_value]) + else: + getattr(message, field.name)[key_value] = _ConvertScalarFieldValue( + value[key], value_field) + + +def _ConvertScalarFieldValue(value, field, require_quote=False): + """Convert a single scalar field value. + + Args: + value: A scalar value to convert the scalar field value. + field: The descriptor of the field to convert. + require_quote: If True, '"' is required for the field value. + + Returns: + The converted scalar field value + + Raises: + ParseError: In case of convert problems. + """ + if field.cpp_type in _INT_TYPES: + return _ConvertInteger(value) + elif field.cpp_type in _FLOAT_TYPES: + return _ConvertFloat(value) + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: + return _ConvertBool(value, require_quote) + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: + if field.type == descriptor.FieldDescriptor.TYPE_BYTES: + return base64.b64decode(value) + else: + return value + elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: + # Convert an enum value. + enum_value = field.enum_type.values_by_name.get(value, None) + if enum_value is None: + raise ParseError( + 'Enum value must be a string literal with double quotes. ' + 'Type "{0}" has no value named {1}.'.format( + field.enum_type.full_name, value)) + return enum_value.number + + +def _ConvertInteger(value): + """Convert an integer. + + Args: + value: A scalar value to convert. + + Returns: + The integer value. + + Raises: + ParseError: If an integer couldn't be consumed. + """ + if isinstance(value, float): + raise ParseError('Couldn\'t parse integer: {0}'.format(value)) + + if isinstance(value, _UNICODETYPE) and not _INTEGER.match(value): + raise ParseError('Couldn\'t parse integer: "{0}"'.format(value)) + + return int(value) + + +def _ConvertFloat(value): + """Convert an floating point number.""" + if value == 'nan': + raise ParseError('Couldn\'t parse float "nan", use "NaN" instead') + try: + # Assume Python compatible syntax. + return float(value) + except ValueError: + # Check alternative spellings. + if value == '-Infinity': + return float('-inf') + elif value == 'Infinity': + return float('inf') + elif value == 'NaN': + return float('nan') + else: + raise ParseError('Couldn\'t parse float: {0}'.format(value)) + + +def _ConvertBool(value, require_quote): + """Convert a boolean value. + + Args: + value: A scalar value to convert. + require_quote: If True, '"' is required for the boolean value. + + Returns: + The bool parsed. + + Raises: + ParseError: If a boolean value couldn't be consumed. + """ + if require_quote: + if value == 'true': + return True + elif value == 'false': + return False + else: + raise ParseError('Expect "true" or "false", not {0}.'.format(value)) + + if not isinstance(value, bool): + raise ParseError('Expected true or false without quotes.') + return value |