From f8c151f21e821371c55d1cd02b89ac1b8f50460f Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Fri, 3 Jul 2015 11:56:29 +0100 Subject: Initial implementation of JSON formatting - No parsing - Reflection based, so not hugely efficient - No line breaks or indentation --- .../ProtocolBuffers/Descriptors/EnumDescriptor.cs | 1 + .../ProtocolBuffers/FieldAccess/IFieldAccessor.cs | 2 + csharp/src/ProtocolBuffers/IMessage.cs | 6 +- csharp/src/ProtocolBuffers/JsonFormatter.cs | 521 +++++++++++++++++++++ csharp/src/ProtocolBuffers/ProtocolBuffers.csproj | 1 + 5 files changed, 528 insertions(+), 3 deletions(-) create mode 100644 csharp/src/ProtocolBuffers/JsonFormatter.cs (limited to 'csharp/src/ProtocolBuffers') diff --git a/csharp/src/ProtocolBuffers/Descriptors/EnumDescriptor.cs b/csharp/src/ProtocolBuffers/Descriptors/EnumDescriptor.cs index a6db5268..57860e61 100644 --- a/csharp/src/ProtocolBuffers/Descriptors/EnumDescriptor.cs +++ b/csharp/src/ProtocolBuffers/Descriptors/EnumDescriptor.cs @@ -89,6 +89,7 @@ namespace Google.Protobuf.Descriptors /// /// Finds an enum value by number. If multiple enum values have the /// same number, this returns the first defined value with that number. + /// If there is no value for the given number, this returns null. /// public EnumValueDescriptor FindValueByNumber(int number) { diff --git a/csharp/src/ProtocolBuffers/FieldAccess/IFieldAccessor.cs b/csharp/src/ProtocolBuffers/FieldAccess/IFieldAccessor.cs index 77e7146d..d1727cb4 100644 --- a/csharp/src/ProtocolBuffers/FieldAccess/IFieldAccessor.cs +++ b/csharp/src/ProtocolBuffers/FieldAccess/IFieldAccessor.cs @@ -44,6 +44,8 @@ namespace Google.Protobuf.FieldAccess /// FieldDescriptor Descriptor { get; } + // TODO: Should the argument type for these messages by IReflectedMessage? + /// /// Clears the field in the specified message. (For repeated fields, /// this clears the list.) diff --git a/csharp/src/ProtocolBuffers/IMessage.cs b/csharp/src/ProtocolBuffers/IMessage.cs index ad44668c..3324e9ae 100644 --- a/csharp/src/ProtocolBuffers/IMessage.cs +++ b/csharp/src/ProtocolBuffers/IMessage.cs @@ -40,9 +40,9 @@ namespace Google.Protobuf // TODO(jonskeet): Split these interfaces into separate files when we're happy with them. /// - /// Reflection support for a specific message type. + /// Reflection support for accessing field values. /// - public interface IReflectedMessage + public interface IReflectedMessage : IMessage { FieldAccessorTable Fields { get; } // TODO(jonskeet): Descriptor? Or a single property which has "all you need for reflection"? @@ -81,7 +81,7 @@ namespace Google.Protobuf /// the implementation class. /// /// The message type. - public interface IMessage : IMessage, IEquatable, IDeepCloneable, IFreezable where T : IMessage + public interface IMessage : IReflectedMessage, IEquatable, IDeepCloneable, IFreezable where T : IMessage { /// /// Merges the given message into this one. diff --git a/csharp/src/ProtocolBuffers/JsonFormatter.cs b/csharp/src/ProtocolBuffers/JsonFormatter.cs new file mode 100644 index 00000000..a6aa552f --- /dev/null +++ b/csharp/src/ProtocolBuffers/JsonFormatter.cs @@ -0,0 +1,521 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2015 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; +using System.Collections; +using System.Globalization; +using System.Text; +using Google.Protobuf.Descriptors; +using Google.Protobuf.FieldAccess; + +namespace Google.Protobuf +{ + /// + /// Reflection-based converter from messages to JSON. + /// + /// + /// + /// Instances of this class are thread-safe, with no mutable state. + /// + /// + /// This is a simple start to get JSON formatting working. As it's reflection-based, + /// it's not as quick as baking calls into generated messages - but is a simpler implementation. + /// (This code is generally not heavily optimized.) + /// + /// + public sealed class JsonFormatter + { + private static JsonFormatter defaultInstance = new JsonFormatter(Settings.Default); + + /// + /// Returns a formatter using the default settings. + /// + public static JsonFormatter Default { get { return defaultInstance; } } + + /// + /// The JSON representation of the first 160 characters of Unicode. + /// Empty strings are replaced by the static constructor. + /// + private static readonly string[] CommonRepresentations = { + // C0 (ASCII and derivatives) control characters + "\\u0000", "\\u0001", "\\u0002", "\\u0003", // 0x00 + "\\u0004", "\\u0005", "\\u0006", "\\u0007", + "\\b", "\\t", "\\n", "\\u000b", + "\\f", "\\r", "\\u000e", "\\u000f", + "\\u0010", "\\u0011", "\\u0012", "\\u0013", // 0x10 + "\\u0014", "\\u0015", "\\u0016", "\\u0017", + "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f", + // Escaping of " and \ are required by www.json.org string definition. + // Escaping of < and > are required for HTML security. + "", "", "\\\"", "", "", "", "", "", // 0x20 + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", // 0x30 + "", "", "", "", "\\u003c", "", "\\u003e", "", + "", "", "", "", "", "", "", "", // 0x40 + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", // 0x50 + "", "", "", "", "\\\\", "", "", "", + "", "", "", "", "", "", "", "", // 0x60 + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", // 0x70 + "", "", "", "", "", "", "", "\\u007f", + // C1 (ISO 8859 and Unicode) extended control characters + "\\u0080", "\\u0081", "\\u0082", "\\u0083", // 0x80 + "\\u0084", "\\u0085", "\\u0086", "\\u0087", + "\\u0088", "\\u0089", "\\u008a", "\\u008b", + "\\u008c", "\\u008d", "\\u008e", "\\u008f", + "\\u0090", "\\u0091", "\\u0092", "\\u0093", // 0x90 + "\\u0094", "\\u0095", "\\u0096", "\\u0097", + "\\u0098", "\\u0099", "\\u009a", "\\u009b", + "\\u009c", "\\u009d", "\\u009e", "\\u009f" + }; + + static JsonFormatter() + { + for (int i = 0; i < CommonRepresentations.Length; i++) + { + if (CommonRepresentations[i] == "") + { + CommonRepresentations[i] = ((char) i).ToString(); + } + } + } + + private readonly Settings settings; + + public JsonFormatter(Settings settings) + { + this.settings = settings; + } + + public string Format(IReflectedMessage message) + { + ThrowHelper.ThrowIfNull(message, "message"); + StringBuilder builder = new StringBuilder(); + WriteMessage(builder, message); + return builder.ToString(); + } + + private void WriteMessage(StringBuilder builder, IReflectedMessage message) + { + if (message == null) + { + WriteNull(builder); + return; + } + builder.Append("{ "); + var fields = message.Fields; + bool first = true; + foreach (var accessor in fields.Accessors) + { + object value = accessor.GetValue(message); + if (!settings.FormatDefaultValues && IsDefaultValue(accessor, value)) + { + continue; + } + if (!first) + { + builder.Append(", "); + } + WriteString(builder, ToCamelCase(accessor.Descriptor.Name)); + builder.Append(": "); + WriteValue(builder, accessor, value); + first = false; + } + builder.Append(first ? "}" : " }"); + } + + // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase + internal static string ToCamelCase(string input) + { + bool capitalizeNext = false; + bool wasCap = true; + bool isCap = false; + bool firstWord = true; + StringBuilder result = new StringBuilder(input.Length); + + for (int i = 0; i < input.Length; i++, wasCap = isCap) + { + isCap = char.IsUpper(input[i]); + if (input[i] == '_') + { + capitalizeNext = true; + if (result.Length != 0) + { + firstWord = false; + } + continue; + } + else if (firstWord) + { + // Consider when the current character B is capitalized, + // first word ends when: + // 1) following a lowercase: "...aB..." + // 2) followed by a lowercase: "...ABc..." + if (result.Length != 0 && isCap && + (!wasCap || (i + 1 < input.Length && char.IsLower(input[i + 1])))) + { + firstWord = false; + } + else + { + result.Append(char.ToLowerInvariant(input[i])); + continue; + } + } + else if (capitalizeNext) + { + capitalizeNext = false; + if (char.IsLower(input[i])) + { + result.Append(char.ToUpperInvariant(input[i])); + continue; + } + } + result.Append(input[i]); + } + return result.ToString(); + } + + private static void WriteNull(StringBuilder builder) + { + builder.Append("null"); + } + + private static bool IsDefaultValue(IFieldAccessor accessor, object value) + { + if (accessor.Descriptor.IsMap) + { + IDictionary dictionary = (IDictionary) value; + return dictionary.Count == 0; + } + if (accessor.Descriptor.IsRepeated) + { + IList list = (IList) value; + return list.Count == 0; + } + switch (accessor.Descriptor.FieldType) + { + case FieldType.Bool: + return (bool) value == false; + case FieldType.Bytes: + return (ByteString) value == ByteString.Empty; + case FieldType.String: + return (string) value == ""; + case FieldType.Double: + return (double) value == 0.0; + case FieldType.SInt32: + case FieldType.Int32: + case FieldType.SFixed32: + case FieldType.Enum: + return (int) value == 0; + case FieldType.Fixed32: + case FieldType.UInt32: + return (uint) value == 0; + case FieldType.Fixed64: + case FieldType.UInt64: + return (ulong) value == 0; + case FieldType.SFixed64: + case FieldType.Int64: + case FieldType.SInt64: + return (long) value == 0; + case FieldType.Float: + return (float) value == 0f; + case FieldType.Message: + case FieldType.Group: // Never expect to get this, but... + return value == null; + default: + throw new ArgumentException("Invalid field type"); + } + } + + private void WriteValue(StringBuilder builder, IFieldAccessor accessor, object value) + { + if (accessor.Descriptor.IsMap) + { + WriteDictionary(builder, accessor, (IDictionary) value); + } + else if (accessor.Descriptor.IsRepeated) + { + WriteList(builder, accessor, (IList) value); + } + else + { + WriteSingleValue(builder, accessor.Descriptor, value); + } + } + + private void WriteSingleValue(StringBuilder builder, FieldDescriptor descriptor, object value) + { + switch (descriptor.FieldType) + { + case FieldType.Bool: + builder.Append((bool) value ? "true" : "false"); + break; + case FieldType.Bytes: + // Nothing in Base64 needs escaping + builder.Append('"'); + builder.Append(((ByteString) value).ToBase64()); + builder.Append('"'); + break; + case FieldType.String: + WriteString(builder, (string) value); + break; + case FieldType.Fixed32: + case FieldType.UInt32: + case FieldType.SInt32: + case FieldType.Int32: + case FieldType.SFixed32: + { + IFormattable formattable = (IFormattable) value; + builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture)); + break; + } + case FieldType.Enum: + EnumValueDescriptor enumValue = descriptor.EnumType.FindValueByNumber((int) value); + if (enumValue != null) + { + WriteString(builder, enumValue.Name); + } + else + { + // ??? Need more documentation + builder.Append(((int) value).ToString("d", CultureInfo.InvariantCulture)); + } + break; + case FieldType.Fixed64: + case FieldType.UInt64: + case FieldType.SFixed64: + case FieldType.Int64: + case FieldType.SInt64: + { + builder.Append('"'); + IFormattable formattable = (IFormattable) value; + builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture)); + builder.Append('"'); + break; + } + case FieldType.Double: + case FieldType.Float: + string text = ((IFormattable) value).ToString("r", CultureInfo.InvariantCulture); + if (text == "NaN" || text == "Infinity" || text == "-Infinity") + { + builder.Append('"'); + builder.Append(text); + builder.Append('"'); + } + else + { + builder.Append(text); + } + break; + case FieldType.Message: + case FieldType.Group: // Never expect to get this, but... + WriteMessage(builder, (IReflectedMessage) value); + break; + default: + throw new ArgumentException("Invalid field type: " + descriptor.FieldType); + } + } + + private void WriteList(StringBuilder builder, IFieldAccessor accessor, IList list) + { + builder.Append("[ "); + bool first = true; + foreach (var value in list) + { + if (!first) + { + builder.Append(", "); + } + WriteSingleValue(builder, accessor.Descriptor, value); + first = false; + } + builder.Append(first ? "]" : " ]"); + } + + private void WriteDictionary(StringBuilder builder, IFieldAccessor accessor, IDictionary dictionary) + { + builder.Append("{ "); + bool first = true; + FieldDescriptor keyType = accessor.Descriptor.MessageType.FindFieldByNumber(1); + FieldDescriptor valueType = accessor.Descriptor.MessageType.FindFieldByNumber(2); + // This will box each pair. Could use IDictionaryEnumerator, but that's ugly in terms of disposal. + foreach (DictionaryEntry pair in dictionary) + { + if (!first) + { + builder.Append(", "); + } + string keyText; + switch (keyType.FieldType) + { + case FieldType.String: + keyText = (string) pair.Key; + break; + case FieldType.Bool: + keyText = (bool) pair.Key ? "true" : "false"; + break; + case FieldType.Fixed32: + case FieldType.Fixed64: + case FieldType.SFixed32: + case FieldType.SFixed64: + case FieldType.Int32: + case FieldType.Int64: + case FieldType.SInt32: + case FieldType.SInt64: + case FieldType.UInt32: + case FieldType.UInt64: + keyText = ((IFormattable) pair.Key).ToString("d", CultureInfo.InvariantCulture); + break; + default: + throw new ArgumentException("Invalid key type: " + keyType.FieldType); + } + WriteString(builder, keyText); + builder.Append(": "); + WriteSingleValue(builder, valueType, pair.Value); + first = false; + } + builder.Append(first ? "}" : " }"); + } + + /// + /// Writes a string (including leading and trailing double quotes) to a builder, escaping as required. + /// + /// + /// Other than surrogate pair handling, this code is mostly taken from src/google/protobuf/util/internal/json_escaping.cc. + /// + private void WriteString(StringBuilder builder, string text) + { + builder.Append('"'); + for (int i = 0; i < text.Length; i++) + { + char c = text[i]; + if (c < 0xa0) + { + builder.Append(CommonRepresentations[c]); + continue; + } + if (char.IsHighSurrogate(c)) + { + // Encountered first part of a surrogate pair. + // Check that we have the whole pair, and encode both parts as hex. + i++; + if (i == text.Length || !char.IsLowSurrogate(text[i])) + { + throw new ArgumentException("String contains low surrogate not followed by high surrogate"); + } + HexEncodeUtf16CodeUnit(builder, c); + HexEncodeUtf16CodeUnit(builder, text[i]); + continue; + } + else if (char.IsLowSurrogate(c)) + { + throw new ArgumentException("String contains high surrogate not preceded by low surrogate"); + } + switch ((uint) c) + { + // These are not required by json spec + // but used to prevent security bugs in javascript. + case 0xfeff: // Zero width no-break space + case 0xfff9: // Interlinear annotation anchor + case 0xfffa: // Interlinear annotation separator + case 0xfffb: // Interlinear annotation terminator + + case 0x00ad: // Soft-hyphen + case 0x06dd: // Arabic end of ayah + case 0x070f: // Syriac abbreviation mark + case 0x17b4: // Khmer vowel inherent Aq + case 0x17b5: // Khmer vowel inherent Aa + HexEncodeUtf16CodeUnit(builder, c); + break; + + default: + if ((c >= 0x0600 && c <= 0x0603) || // Arabic signs + (c >= 0x200b && c <= 0x200f) || // Zero width etc. + (c >= 0x2028 && c <= 0x202e) || // Separators etc. + (c >= 0x2060 && c <= 0x2064) || // Invisible etc. + (c >= 0x206a && c <= 0x206f)) + { + HexEncodeUtf16CodeUnit(builder, c); + } + else + { + // No handling of surrogates here - that's done earlier + builder.Append(c); + } + break; + } + } + builder.Append('"'); + } + + private const string Hex = "0123456789abcdef"; + private static void HexEncodeUtf16CodeUnit(StringBuilder builder, char c) + { + uint utf16 = c; + builder.Append("\\u"); + builder.Append(Hex[(c >> 12) & 0xf]); + builder.Append(Hex[(c >> 8) & 0xf]); + builder.Append(Hex[(c >> 4) & 0xf]); + builder.Append(Hex[(c >> 0) & 0xf]); + } + + /// + /// Settings controlling JSON formatting. + /// + public sealed class Settings + { + private static readonly Settings defaultInstance = new Settings(false); + + /// + /// Default settings, as used by + /// + public static Settings Default { get { return defaultInstance; } } + + private readonly bool formatDefaultValues; + + + /// + /// Whether fields whose values are the default for the field type (e.g. 0 for integers) + /// should be formatted (true) or omitted (false). + /// + public bool FormatDefaultValues { get { return formatDefaultValues; } } + + public Settings(bool formatDefaultValues) + { + this.formatDefaultValues = formatDefaultValues; + } + } + } +} diff --git a/csharp/src/ProtocolBuffers/ProtocolBuffers.csproj b/csharp/src/ProtocolBuffers/ProtocolBuffers.csproj index aa4adcc0..17532de8 100644 --- a/csharp/src/ProtocolBuffers/ProtocolBuffers.csproj +++ b/csharp/src/ProtocolBuffers/ProtocolBuffers.csproj @@ -81,6 +81,7 @@ + -- cgit v1.2.3