From 3de2fced6be1cc5e8f321c5aee2bb43176be962a Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Mon, 23 Nov 2015 16:21:47 +0000 Subject: Handle JSON parsing for Any. This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful... --- csharp/src/Google.Protobuf/JsonParser.cs | 149 ++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 10 deletions(-) (limited to 'csharp/src/Google.Protobuf/JsonParser.cs') diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs index 2019029b..95f9ad35 100644 --- a/csharp/src/Google.Protobuf/JsonParser.cs +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -77,6 +77,7 @@ namespace Google.Protobuf { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) }, { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, { Int32Value.Descriptor.FullName, MergeWrapperField }, { Int64Value.Descriptor.FullName, MergeWrapperField }, @@ -128,7 +129,7 @@ namespace Google.Protobuf /// Reader providing the JSON to parse. internal void Merge(IMessage message, TextReader jsonReader) { - var tokenizer = new JsonTokenizer(jsonReader); + var tokenizer = JsonTokenizer.FromTextReader(jsonReader); Merge(message, tokenizer); var lastToken = tokenizer.Next(); if (lastToken != JsonToken.EndDocument) @@ -338,6 +339,7 @@ namespace Google.Protobuf /// The JSON does not represent a Protocol Buffers message correctly public T Parse(string json) where T : IMessage, new() { + Preconditions.CheckNotNull(json, nameof(json)); return Parse(new StringReader(json)); } @@ -350,11 +352,42 @@ namespace Google.Protobuf /// The JSON does not represent a Protocol Buffers message correctly public T Parse(TextReader jsonReader) where T : IMessage, new() { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); T message = new T(); Merge(message, jsonReader); return message; } + /// + /// Parses into a new message. + /// + /// The JSON to parse. + /// Descriptor of message type to parse. + /// The JSON does not comply with RFC 7159 + /// The JSON does not represent a Protocol Buffers message correctly + public IMessage Parse(string json, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(json, nameof(json)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + return Parse(new StringReader(json), descriptor); + } + + /// + /// Parses JSON read from into a new message. + /// + /// Reader providing the JSON to parse. + /// Descriptor of message type to parse. + /// The JSON does not comply with RFC 7159 + /// The JSON does not represent a Protocol Buffers message correctly + public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor) + { + Preconditions.CheckNotNull(jsonReader, nameof(jsonReader)); + Preconditions.CheckNotNull(descriptor, nameof(descriptor)); + IMessage message = descriptor.Parser.CreateTemplate(); + Merge(message, jsonReader); + return message; + } + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) { var firstToken = tokenizer.Next(); @@ -410,6 +443,83 @@ namespace Google.Protobuf MergeMapField(message, field, tokenizer); } + private void MergeAny(IMessage message, JsonTokenizer tokenizer) + { + // Record the token stream until we see the @type property. At that point, we can take the value, consult + // the type registry for the relevant message, and replay the stream, omitting the @type property. + var tokens = new List(); + + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Any"); + } + int typeUrlObjectDepth = tokenizer.ObjectDepth; + + // The check for the property depth protects us from nested Any values which occur before the type URL + // for *this* Any. + while (token.Type != JsonToken.TokenType.Name || + token.StringValue != JsonFormatter.AnyTypeUrlField || + tokenizer.ObjectDepth != typeUrlObjectDepth) + { + tokens.Add(token); + token = tokenizer.Next(); + } + + // Don't add the @type property or its value to the recorded token list + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Any.@type"); + } + string typeUrl = token.StringValue; + string typeName = JsonFormatter.GetTypeName(typeUrl); + + MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName); + if (descriptor == null) + { + throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'"); + } + + // Now replay the token stream we've already read and anything that remains of the object, just parsing it + // as normal. Our original tokenizer should end up at the end of the object. + var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); + var body = descriptor.Parser.CreateTemplate(); + if (descriptor.IsWellKnownType) + { + MergeWellKnownTypeAnyBody(body, replay); + } + else + { + Merge(body, replay); + } + var data = body.ToByteString(); + + // Now that we have the message data, we can pack it into an Any (the message received as a parameter). + message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl); + message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data); + } + + // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property + // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value + // itself, and then end-object. + private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); // Definitely start-object; checked in previous method + token = tokenizer.Next(); + // TODO: What about an absent Int32Value, for example? + if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField) + { + throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body"); + } + Merge(body, tokenizer); + token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.EndObject) + { + throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type"); + } + } + #region Utility methods which don't depend on the state (or settings) of the parser. private static object ParseMapKey(FieldDescriptor field, string keyText) { @@ -789,29 +899,48 @@ namespace Google.Protobuf /// public sealed class Settings { - private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit); - - private readonly int recursionLimit; - /// - /// Default settings, as used by + /// Default settings, as used by . This has the same default + /// recursion limit as , and an empty type registry. /// - public static Settings Default { get { return defaultInstance; } } + public static Settings Default { get; } + + // Workaround for the Mono compiler complaining about XML comments not being on + // valid language elements. + static Settings() + { + Default = new Settings(CodedInputStream.DefaultRecursionLimit); + } /// /// The maximum depth of messages to parse. Note that this limit only applies to parsing /// messages, not collections - so a message within a collection within a message only counts as /// depth 2, not 3. /// - public int RecursionLimit { get { return recursionLimit; } } + public int RecursionLimit { get; } + + /// + /// The type registry used to parse messages. + /// + public TypeRegistry TypeRegistry { get; } /// /// Creates a new object with the specified recursion limit. /// /// The maximum depth of messages to parse - public Settings(int recursionLimit) + public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty) + { + } + + /// + /// Creates a new object with the specified recursion limit and type registry. + /// + /// The maximum depth of messages to parse + /// The type registry used to parse messages + public Settings(int recursionLimit, TypeRegistry typeRegistry) { - this.recursionLimit = recursionLimit; + RecursionLimit = recursionLimit; + TypeRegistry = Preconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)); } } } -- cgit v1.2.3