diff options
Diffstat (limited to 'csharp/src/Google.Protobuf/JsonParser.cs')
-rw-r--r-- | csharp/src/Google.Protobuf/JsonParser.cs | 1019 |
1 files changed, 1019 insertions, 0 deletions
diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs new file mode 100644 index 00000000..6b6f2d9a --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -0,0 +1,1019 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2015 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using Google.Protobuf.Reflection; +using Google.Protobuf.WellKnownTypes; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; +using System.Text.RegularExpressions; + +namespace Google.Protobuf +{ + /// <summary> + /// Reflection-based converter from JSON to messages. + /// </summary> + /// <remarks> + /// <para> + /// Instances of this class are thread-safe, with no mutable state. + /// </para> + /// <para> + /// This is a simple start to get JSON parsing working. As it's reflection-based, + /// it's not as quick as baking calls into generated messages - but is a simpler implementation. + /// (This code is generally not heavily optimized.) + /// </para> + /// </remarks> + public sealed class JsonParser + { + // Note: using 0-9 instead of \d to ensure no non-ASCII digits. + // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. + private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; + private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; + + private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); + + // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers + // and the signatures of various methods. + private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> + WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> + { + { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, + { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, + { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, + { ListValue.Descriptor.FullName, (parser, message, tokenizer) => + parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, + { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, + { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, + { Int32Value.Descriptor.FullName, MergeWrapperField }, + { Int64Value.Descriptor.FullName, MergeWrapperField }, + { UInt32Value.Descriptor.FullName, MergeWrapperField }, + { UInt64Value.Descriptor.FullName, MergeWrapperField }, + { FloatValue.Descriptor.FullName, MergeWrapperField }, + { DoubleValue.Descriptor.FullName, MergeWrapperField }, + { BytesValue.Descriptor.FullName, MergeWrapperField }, + { StringValue.Descriptor.FullName, MergeWrapperField }, + { BoolValue.Descriptor.FullName, MergeWrapperField } + }; + + // Convenience method to avoid having to repeat the same code multiple times in the above + // dictionary initialization. + private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) + { + parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer); + } + + /// <summary> + /// Returns a formatter using the default settings. + /// </summary> + public static JsonParser Default { get { return defaultInstance; } } + + private readonly Settings settings; + + /// <summary> + /// Creates a new formatted with the given settings. + /// </summary> + /// <param name="settings">The settings.</param> + public JsonParser(Settings settings) + { + this.settings = settings; + } + + /// <summary> + /// Parses <paramref name="json"/> and merges the information into the given message. + /// </summary> + /// <param name="message">The message to merge the JSON information into.</param> + /// <param name="json">The JSON to parse.</param> + internal void Merge(IMessage message, string json) + { + Merge(message, new StringReader(json)); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. + /// </summary> + /// <param name="message">The message to merge the JSON information into.</param> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + internal void Merge(IMessage message, TextReader jsonReader) + { + var tokenizer = JsonTokenizer.FromTextReader(jsonReader); + Merge(message, tokenizer); + var lastToken = tokenizer.Next(); + if (lastToken != JsonToken.EndDocument) + { + throw new InvalidProtocolBufferException("Expected end of JSON after object"); + } + } + + /// <summary> + /// Merges the given message using data from the given tokenizer. In most cases, the next + /// token should be a "start object" token, but wrapper types and nullity can invalidate + /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream + /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the + /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". + /// </summary> + private void Merge(IMessage message, JsonTokenizer tokenizer) + { + if (tokenizer.ObjectDepth > settings.RecursionLimit) + { + throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); + } + if (message.Descriptor.IsWellKnownType) + { + Action<JsonParser, IMessage, JsonTokenizer> handler; + if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) + { + handler(this, message, tokenizer); + return; + } + // Well-known types with no special handling continue in the normal way. + } + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object"); + } + var descriptor = message.Descriptor; + var jsonFieldMap = descriptor.Fields.ByJsonName(); + // All the oneof fields we've already accounted for - we can only see each of them once. + // The set is created lazily to avoid the overhead of creating a set for every message + // we parsed, when oneofs are relatively rare. + HashSet<OneofDescriptor> seenOneofs = null; + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + if (token.Type != JsonToken.TokenType.Name) + { + throw new InvalidOperationException("Unexpected token type " + token.Type); + } + string name = token.StringValue; + FieldDescriptor field; + if (jsonFieldMap.TryGetValue(name, out field)) + { + if (field.ContainingOneof != null) + { + if (seenOneofs == null) + { + seenOneofs = new HashSet<OneofDescriptor>(); + } + if (!seenOneofs.Add(field.ContainingOneof)) + { + throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}"); + } + } + MergeField(message, field, tokenizer); + } + else + { + // TODO: Is this what we want to do? If not, we'll need to skip the value, + // which may be an object or array. (We might want to put code in the tokenizer + // to do that.) + throw new InvalidProtocolBufferException("Unknown field: " + name); + } + } + } + + private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + // Clear the field if we see a null token, unless it's for a singular field of type + // google.protobuf.Value. + // Note: different from Java API, which just ignores it. + // TODO: Bring it more in line? Discuss... + if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field)) + { + field.Accessor.Clear(message); + return; + } + } + tokenizer.PushBack(token); + + if (field.IsMap) + { + MergeMapField(message, field, tokenizer); + } + else if (field.IsRepeated) + { + MergeRepeatedField(message, field, tokenizer); + } + else + { + var value = ParseSingleValue(field, tokenizer); + field.Accessor.SetValue(message, value); + } + } + + private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartArray) + { + throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); + } + + IList list = (IList) field.Accessor.GetValue(message); + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndArray) + { + return; + } + tokenizer.PushBack(token); + if (token.Type == JsonToken.TokenType.Null) + { + throw new InvalidProtocolBufferException("Repeated field elements cannot be null"); + } + list.Add(ParseSingleValue(field, tokenizer)); + } + } + + private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object to populate a map"); + } + + var type = field.MessageType; + var keyField = type.FindFieldByNumber(1); + var valueField = type.FindFieldByNumber(2); + if (keyField == null || valueField == null) + { + throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); + } + IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); + + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + object key = ParseMapKey(keyField, token.StringValue); + object value = ParseSingleValue(valueField, tokenizer); + if (value == null) + { + throw new InvalidProtocolBufferException("Map values must not be null"); + } + dictionary[key] = value; + } + } + + private static bool IsGoogleProtobufValueField(FieldDescriptor field) + { + return field.FieldType == FieldType.Message && + field.MessageType.FullName == Value.Descriptor.FullName; + } + + private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + // TODO: In order to support dynamic messages, we should really build this up + // dynamically. + if (IsGoogleProtobufValueField(field)) + { + return Value.ForNull(); + } + return null; + } + + var fieldType = field.FieldType; + if (fieldType == FieldType.Message) + { + // Parse wrapper types as their constituent types. + // TODO: What does this mean for null? + if (field.MessageType.IsWrapperType) + { + field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber]; + fieldType = field.FieldType; + } + else + { + // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) + tokenizer.PushBack(token); + IMessage subMessage = NewMessageForField(field); + Merge(subMessage, tokenizer); + return subMessage; + } + } + + switch (token.Type) + { + case JsonToken.TokenType.True: + case JsonToken.TokenType.False: + if (fieldType == FieldType.Bool) + { + return token.Type == JsonToken.TokenType.True; + } + // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default + // case instead, but this way we'd only need to change one place. + goto default; + case JsonToken.TokenType.StringValue: + return ParseSingleStringValue(field, token.StringValue); + // Note: not passing the number value itself here, as we may end up storing the string value in the token too. + case JsonToken.TokenType.Number: + return ParseSingleNumberValue(field, token); + case JsonToken.TokenType.Null: + throw new NotImplementedException("Haven't worked out what to do for null yet"); + default: + throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); + } + } + + /// <summary> + /// Parses <paramref name="json"/> into a new message. + /// </summary> + /// <typeparam name="T">The type of message to create.</typeparam> + /// <param name="json">The JSON to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public T Parse<T>(string json) where T : IMessage, new() + { + ProtoPreconditions.CheckNotNull(json, nameof(json)); + return Parse<T>(new StringReader(json)); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> into a new message. + /// </summary> + /// <typeparam name="T">The type of message to create.</typeparam> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public T Parse<T>(TextReader jsonReader) where T : IMessage, new() + { + ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); + T message = new T(); + Merge(message, jsonReader); + return message; + } + + /// <summary> + /// Parses <paramref name="json"/> into a new message. + /// </summary> + /// <param name="json">The JSON to parse.</param> + /// <param name="descriptor">Descriptor of message type to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public IMessage Parse(string json, MessageDescriptor descriptor) + { + ProtoPreconditions.CheckNotNull(json, nameof(json)); + ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); + return Parse(new StringReader(json), descriptor); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> into a new message. + /// </summary> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + /// <param name="descriptor">Descriptor of message type to parse.</param> + /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception> + /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception> + public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) + { + ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader)); + ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor)); + IMessage message = descriptor.Parser.CreateTemplate(); + Merge(message, jsonReader); + return message; + } + + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) + { + var firstToken = tokenizer.Next(); + var fields = message.Descriptor.Fields; + switch (firstToken.Type) + { + case JsonToken.TokenType.Null: + fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); + return; + case JsonToken.TokenType.StringValue: + fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); + return; + case JsonToken.TokenType.Number: + fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); + return; + case JsonToken.TokenType.False: + case JsonToken.TokenType.True: + fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); + return; + case JsonToken.TokenType.StartObject: + { + var field = fields[Value.StructValueFieldNumber]; + var structMessage = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(structMessage, tokenizer); + field.Accessor.SetValue(message, structMessage); + return; + } + case JsonToken.TokenType.StartArray: + { + var field = fields[Value.ListValueFieldNumber]; + var list = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(list, tokenizer); + field.Accessor.SetValue(message, list); + return; + } + default: + throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); + } + } + + private void MergeStruct(IMessage message, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Struct"); + } + tokenizer.PushBack(token); + + var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; + MergeMapField(message, field, tokenizer); + } + + private void MergeAny(IMessage message, JsonTokenizer tokenizer) + { + // Record the token stream until we see the @type property. At that point, we can take the value, consult + // the type registry for the relevant message, and replay the stream, omitting the @type property. + var tokens = new List<JsonToken>(); + + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Any"); + } + int typeUrlObjectDepth = tokenizer.ObjectDepth; + + // The check for the property depth protects us from nested Any values which occur before the type URL + // for *this* Any. + while (token.Type != JsonToken.TokenType.Name || + token.StringValue != JsonFormatter.AnyTypeUrlField || + tokenizer.ObjectDepth != typeUrlObjectDepth) + { + tokens.Add(token); + token = tokenizer.Next(); + + if (tokenizer.ObjectDepth < typeUrlObjectDepth) + { + throw new InvalidProtocolBufferException("Any message with no @type"); + } + } + + // Don't add the @type property or its value to the recorded token list + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Any.@type"); + } + string typeUrl = token.StringValue; + string typeName = Any.GetTypeName(typeUrl); + + MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); + if (descriptor == null) + { + throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); + } + + // Now replay the token stream we've already read and anything that remains of the object, just parsing it + // as normal. Our original tokenizer should end up at the end of the object. + var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); + var body = descriptor.Parser.CreateTemplate(); + if (descriptor.IsWellKnownType) + { + MergeWellKnownTypeAnyBody(body, replay); + } + else + { + Merge(body, replay); + } + var data = body.ToByteString(); + + // Now that we have the message data, we can pack it into an Any (the message received as a parameter). + message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); + message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); + } + + // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property + // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value + // itself, and then end-object. + private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); // Definitely start-object; checked in previous method + token = tokenizer.Next(); + // TODO: What about an absent Int32Value, for example? + if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) + { + throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); + } + Merge(body, tokenizer); + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.EndObject) + { + throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); + } + } + + #region Utility methods which don't depend on the state (or settings) of the parser. + private static object ParseMapKey(FieldDescriptor field, string keyText) + { + switch (field.FieldType) + { + case FieldType.Bool: + if (keyText == "true") + { + return true; + } + if (keyText == "false") + { + return false; + } + throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); + case FieldType.String: + return keyText; + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(keyText, int.Parse); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(keyText, uint.Parse); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(keyText, long.Parse); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(keyText, ulong.Parse); + default: + throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); + } + } + + private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) + { + double value = token.NumberValue; + checked + { + try + { + switch (field.FieldType) + { + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + CheckInteger(value); + return (int) value; + case FieldType.UInt32: + case FieldType.Fixed32: + CheckInteger(value); + return (uint) value; + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + CheckInteger(value); + return (long) value; + case FieldType.UInt64: + case FieldType.Fixed64: + CheckInteger(value); + return (ulong) value; + case FieldType.Double: + return value; + case FieldType.Float: + if (double.IsNaN(value)) + { + return float.NaN; + } + if (value > float.MaxValue || value < float.MinValue) + { + if (double.IsPositiveInfinity(value)) + { + return float.PositiveInfinity; + } + if (double.IsNegativeInfinity(value)) + { + return float.NegativeInfinity; + } + throw new InvalidProtocolBufferException($"Value out of range: {value}"); + } + return (float) value; + case FieldType.Enum: + CheckInteger(value); + // Just return it as an int, and let the CLR convert it. + // Note that we deliberately don't check that it's a known value. + return (int) value; + default: + throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}"); + } + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException($"Value out of range: {value}"); + } + } + } + + private static void CheckInteger(double value) + { + if (double.IsInfinity(value) || double.IsNaN(value)) + { + throw new InvalidProtocolBufferException($"Value not an integer: {value}"); + } + if (value != Math.Floor(value)) + { + throw new InvalidProtocolBufferException($"Value not an integer: {value}"); + } + } + + private static object ParseSingleStringValue(FieldDescriptor field, string text) + { + switch (field.FieldType) + { + case FieldType.String: + return text; + case FieldType.Bytes: + try + { + return ByteString.FromBase64(text); + } + catch (FormatException e) + { + throw InvalidProtocolBufferException.InvalidBase64(e); + } + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(text, int.Parse); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(text, uint.Parse); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(text, long.Parse); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(text, ulong.Parse); + case FieldType.Double: + double d = ParseNumericString(text, double.Parse); + ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d)); + return d; + case FieldType.Float: + float f = ParseNumericString(text, float.Parse); + ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f)); + return f; + case FieldType.Enum: + var enumValue = field.EnumType.FindValueByName(text); + if (enumValue == null) + { + throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}"); + } + // Just return it as an int, and let the CLR convert it. + return enumValue.Number; + default: + throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}"); + } + } + + /// <summary> + /// Creates a new instance of the message type for the given field. + /// </summary> + private static IMessage NewMessageForField(FieldDescriptor field) + { + return field.MessageType.Parser.CreateTemplate(); + } + + private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser) + { + // Can't prohibit this with NumberStyles. + if (text.StartsWith("+")) + { + throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); + } + if (text.StartsWith("0") && text.Length > 1) + { + if (text[1] >= '0' && text[1] <= '9') + { + throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); + } + } + else if (text.StartsWith("-0") && text.Length > 2) + { + if (text[2] >= '0' && text[2] <= '9') + { + throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); + } + } + try + { + return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}"); + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException($"Value out of range: {text}"); + } + } + + /// <summary> + /// Checks that any infinite/NaN values originated from the correct text. + /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the + /// way that Mono parses out-of-range values as infinity. + /// </summary> + private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN) + { + if ((isPositiveInfinity && text != "Infinity") || + (isNegativeInfinity && text != "-Infinity") || + (isNaN && text != "NaN")) + { + throw new InvalidProtocolBufferException($"Invalid numeric value: {text}"); + } + } + + private static void MergeTimestamp(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Timestamp"); + } + var match = TimestampRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}"); + } + var dateTime = match.Groups["datetime"].Value; + var subseconds = match.Groups["subseconds"].Value; + var offset = match.Groups["offset"].Value; + + try + { + DateTime parsed = DateTime.ParseExact( + dateTime, + "yyyy-MM-dd'T'HH:mm:ss", + CultureInfo.InvariantCulture, + DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); + // TODO: It would be nice not to have to create all these objects... easy to optimize later though. + Timestamp timestamp = Timestamp.FromDateTime(parsed); + int nanosToAdd = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); + nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; + } + int secondsToAdd = 0; + if (offset != "Z") + { + // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. + int sign = offset[0] == '-' ? 1 : -1; + int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); + int minutes = int.Parse(offset.Substring(4, 2)); + int totalMinutes = hours * 60 + minutes; + if (totalMinutes > 18 * 60) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + if (totalMinutes == 0 && sign == 1) + { + // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + // We need to *subtract* the offset from local time to get UTC. + secondsToAdd = sign * totalMinutes * 60; + } + // Ensure we've got the right signs. Currently unnecessary, but easy to do. + if (secondsToAdd < 0 && nanosToAdd > 0) + { + secondsToAdd++; + nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; + } + if (secondsToAdd != 0 || nanosToAdd != 0) + { + timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; + // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this + // anywhere, but we shouldn't parse it. + if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); + message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + + private static void MergeDuration(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Duration"); + } + var match = DurationRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var sign = match.Groups["sign"].Value; + var secondsText = match.Groups["int"].Value; + // Prohibit leading insignficant zeroes + if (secondsText[0] == '0' && secondsText.Length > 1) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var subseconds = match.Groups["subseconds"].Value; + var multiplier = sign == "-" ? -1 : 1; + + try + { + long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier; + int nanos = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1)); + nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier; + } + if (!Duration.IsNormalized(seconds, nanos)) + { + throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); + } + message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds); + message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}"); + } + } + + private static void MergeFieldMask(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for FieldMask"); + } + // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? + string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); + IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); + foreach (var path in jsonPaths) + { + messagePaths.Add(ToSnakeCase(path)); + } + } + + // Ported from src/google/protobuf/util/internal/utility.cc + private static string ToSnakeCase(string text) + { + var builder = new StringBuilder(text.Length * 2); + // Note: this is probably unnecessary now, but currently retained to be as close as possible to the + // C++, whilst still throwing an exception on underscores. + bool wasNotUnderscore = false; // Initialize to false for case 1 (below) + bool wasNotCap = false; + + for (int i = 0; i < text.Length; i++) + { + char c = text[i]; + if (c >= 'A' && c <= 'Z') // ascii_isupper + { + // Consider when the current character B is capitalized: + // 1) At beginning of input: "B..." => "b..." + // (e.g. "Biscuit" => "biscuit") + // 2) Following a lowercase: "...aB..." => "...a_b..." + // (e.g. "gBike" => "g_bike") + // 3) At the end of input: "...AB" => "...ab" + // (e.g. "GoogleLAB" => "google_lab") + // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." + // (e.g. "GBike" => "g_bike") + if (wasNotUnderscore && // case 1 out + (wasNotCap || // case 2 in, case 3 out + (i + 1 < text.Length && // case 3 out + (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) + { // case 4 in + // We add an underscore for case 2 and case 4. + builder.Append('_'); + } + // ascii_tolower, but we already know that c *is* an upper case ASCII character... + builder.Append((char) (c + 'a' - 'A')); + wasNotUnderscore = true; + wasNotCap = false; + } + else + { + builder.Append(c); + if (c == '_') + { + throw new InvalidProtocolBufferException($"Invalid field mask: {text}"); + } + wasNotUnderscore = true; + wasNotCap = true; + } + } + return builder.ToString(); + } + #endregion + + /// <summary> + /// Settings controlling JSON parsing. + /// </summary> + public sealed class Settings + { + /// <summary> + /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default + /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry. + /// </summary> + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(CodedInputStream.DefaultRecursionLimit); + } + + /// <summary> + /// The maximum depth of messages to parse. Note that this limit only applies to parsing + /// messages, not collections - so a message within a collection within a message only counts as + /// depth 2, not 3. + /// </summary> + public int RecursionLimit { get; } + + /// <summary> + /// The type registry used to parse <see cref="Any"/> messages. + /// </summary> + public TypeRegistry TypeRegistry { get; } + + /// <summary> + /// Creates a new <see cref="Settings"/> object with the specified recursion limit. + /// </summary> + /// <param name="recursionLimit">The maximum depth of messages to parse</param> + public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) + { + } + + /// <summary> + /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry. + /// </summary> + /// <param name="recursionLimit">The maximum depth of messages to parse</param> + /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param> + public Settings(int recursionLimit, TypeRegistry typeRegistry) + { + RecursionLimit = recursionLimit; + TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); + } + } + } +} |