diff options
Diffstat (limited to 'csharp/src/Google.Protobuf')
-rw-r--r-- | csharp/src/Google.Protobuf/FieldCodec.cs | 24 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/Google.Protobuf.csproj | 5 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonFormatter.cs | 15 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonParser.cs | 813 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonToken.cs | 166 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/JsonTokenizer.cs | 633 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/MessageParser.cs | 12 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs | 3 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs | 10 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs | 2 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs | 99 | ||||
-rw-r--r-- | csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs | 42 |
12 files changed, 1808 insertions, 16 deletions
diff --git a/csharp/src/Google.Protobuf/FieldCodec.cs b/csharp/src/Google.Protobuf/FieldCodec.cs index 20a1f438..54cbc209 100644 --- a/csharp/src/Google.Protobuf/FieldCodec.cs +++ b/csharp/src/Google.Protobuf/FieldCodec.cs @@ -30,6 +30,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endregion +using Google.Protobuf.WellKnownTypes; using System; using System.Collections.Generic; @@ -261,20 +262,17 @@ namespace Google.Protobuf /// </remarks> private static class WrapperCodecs { - // All the field numbers are the same (1). - private const int WrapperValueFieldNumber = Google.Protobuf.WellKnownTypes.Int32Value.ValueFieldNumber; - - private static readonly Dictionary<Type, object> Codecs = new Dictionary<Type, object> + private static readonly Dictionary<System.Type, object> Codecs = new Dictionary<System.Type, object> { - { typeof(bool), ForBool(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(int), ForInt32(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(long), ForInt64(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(uint), ForUInt32(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(ulong), ForUInt64(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, - { typeof(float), ForFloat(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Fixed32)) }, - { typeof(double), ForDouble(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.Fixed64)) }, - { typeof(string), ForString(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) }, - { typeof(ByteString), ForBytes(WireFormat.MakeTag(WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) } + { typeof(bool), ForBool(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(int), ForInt32(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(long), ForInt64(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(uint), ForUInt32(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(ulong), ForUInt64(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Varint)) }, + { typeof(float), ForFloat(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Fixed32)) }, + { typeof(double), ForDouble(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.Fixed64)) }, + { typeof(string), ForString(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) }, + { typeof(ByteString), ForBytes(WireFormat.MakeTag(Wrappers.WrapperValueFieldNumber, WireFormat.WireType.LengthDelimited)) } }; /// <summary> diff --git a/csharp/src/Google.Protobuf/Google.Protobuf.csproj b/csharp/src/Google.Protobuf/Google.Protobuf.csproj index 8c680d46..00399438 100644 --- a/csharp/src/Google.Protobuf/Google.Protobuf.csproj +++ b/csharp/src/Google.Protobuf/Google.Protobuf.csproj @@ -85,6 +85,9 @@ <Compile Include="FrameworkPortability.cs" />
<Compile Include="IDeepCloneable.cs" />
<Compile Include="JsonFormatter.cs" />
+ <Compile Include="JsonParser.cs" />
+ <Compile Include="JsonToken.cs" />
+ <Compile Include="JsonTokenizer.cs" />
<Compile Include="MessageExtensions.cs" />
<Compile Include="IMessage.cs" />
<Compile Include="InvalidProtocolBufferException.cs" />
@@ -130,7 +133,9 @@ <Compile Include="WellKnownTypes\Timestamp.cs" />
<Compile Include="WellKnownTypes\TimestampPartial.cs" />
<Compile Include="WellKnownTypes\Type.cs" />
+ <Compile Include="WellKnownTypes\ValuePartial.cs" />
<Compile Include="WellKnownTypes\Wrappers.cs" />
+ <Compile Include="WellKnownTypes\WrappersPartial.cs" />
<Compile Include="WireFormat.cs" />
</ItemGroup>
<ItemGroup>
diff --git a/csharp/src/Google.Protobuf/JsonFormatter.cs b/csharp/src/Google.Protobuf/JsonFormatter.cs index 3f9bd478..2070f62b 100644 --- a/csharp/src/Google.Protobuf/JsonFormatter.cs +++ b/csharp/src/Google.Protobuf/JsonFormatter.cs @@ -189,6 +189,7 @@ namespace Google.Protobuf } // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase + // TODO: Use the new field in FieldDescriptor. internal static string ToCamelCase(string input) { bool capitalizeNext = false; @@ -382,10 +383,19 @@ namespace Google.Protobuf WriteNull(builder); return; } - // For wrapper types, the value will be the (possibly boxed) "native" value, - // so we can write it as if we were unconditionally writing the Value field for the wrapper type. + // For wrapper types, the value will either be the (possibly boxed) "native" value, + // or the message itself if we're formatting it at the top level (e.g. just calling ToString on the object itself). + // If it's the message form, we can extract the value first, which *will* be the (possibly boxed) native value, + // and then proceed, writing it as if we were definitely in a field. (We never need to wrap it in an extra string... + // WriteValue will do the right thing.) + // TODO: Detect this differently when we have dynamic messages. if (descriptor.File == Int32Value.Descriptor.File) { + if (value is IMessage) + { + var message = (IMessage) value; + value = message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber].Accessor.GetValue(message); + } WriteValue(builder, value); return; } @@ -750,7 +760,6 @@ namespace Google.Protobuf private readonly bool formatDefaultValues; - /// <summary> /// Whether fields whose values are the default for the field type (e.g. 0 for integers) /// should be formatted (true) or omitted (false). diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs new file mode 100644 index 00000000..6d2638d9 --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -0,0 +1,813 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2015 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using Google.Protobuf.Reflection; +using Google.Protobuf.WellKnownTypes; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace Google.Protobuf +{ + /// <summary> + /// Reflection-based converter from JSON to messages. + /// </summary> + /// <remarks> + /// <para> + /// Instances of this class are thread-safe, with no mutable state. + /// </para> + /// <para> + /// This is a simple start to get JSON parsing working. As it's reflection-based, + /// it's not as quick as baking calls into generated messages - but is a simpler implementation. + /// (This code is generally not heavily optimized.) + /// </para> + /// </remarks> + public sealed class JsonParser + { + // Note: using 0-9 instead of \d to ensure no non-ASCII digits. + // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest. + private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable); + private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 }; + private static readonly char[] FieldMaskPathSeparators = new[] { ',' }; + + private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); + + private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> + WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> + { + { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, + { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, + { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, + { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, + { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, + { Int32Value.Descriptor.FullName, MergeWrapperField }, + { Int64Value.Descriptor.FullName, MergeWrapperField }, + { UInt32Value.Descriptor.FullName, MergeWrapperField }, + { UInt64Value.Descriptor.FullName, MergeWrapperField }, + { FloatValue.Descriptor.FullName, MergeWrapperField }, + { DoubleValue.Descriptor.FullName, MergeWrapperField }, + { BytesValue.Descriptor.FullName, MergeWrapperField }, + { StringValue.Descriptor.FullName, MergeWrapperField } + }; + + // Convenience method to avoid having to repeat the same code multiple times in the above + // dictionary initialization. + private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) + { + parser.MergeField(message, message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber], tokenizer); + } + + /// <summary> + /// Returns a formatter using the default settings. /// </summary> + public static JsonParser Default { get { return defaultInstance; } } + +// Currently the settings are unused. +// TODO: When we've implemented Any (and the json spec is finalized), revisit whether they're +// needed at all. +#pragma warning disable 0414 + private readonly Settings settings; +#pragma warning restore 0414 + + /// <summary> + /// Creates a new formatted with the given settings. + /// </summary> + /// <param name="settings">The settings.</param> + public JsonParser(Settings settings) + { + this.settings = settings; + } + + /// <summary> + /// Parses <paramref name="json"/> and merges the information into the given message. + /// </summary> + /// <param name="message">The message to merge the JSON information into.</param> + /// <param name="json">The JSON to parse.</param> + internal void Merge(IMessage message, string json) + { + Merge(message, new StringReader(json)); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message. + /// </summary> + /// <param name="message">The message to merge the JSON information into.</param> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + internal void Merge(IMessage message, TextReader jsonReader) + { + var tokenizer = new JsonTokenizer(jsonReader); + Merge(message, tokenizer); + var lastToken = tokenizer.Next(); + if (lastToken != JsonToken.EndDocument) + { + throw new InvalidProtocolBufferException("Expected end of JSON after object"); + } + } + + /// <summary> + /// Merges the given message using data from the given tokenizer. In most cases, the next + /// token should be a "start object" token, but wrapper types and nullity can invalidate + /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream + /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the + /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". + /// </summary> + private void Merge(IMessage message, JsonTokenizer tokenizer) + { + if (message.Descriptor.IsWellKnownType) + { + Action<JsonParser, IMessage, JsonTokenizer> handler; + if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) + { + handler(this, message, tokenizer); + return; + } + // Well-known types with no special handling continue in the normal way. + } + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object"); + } + var descriptor = message.Descriptor; + // TODO: Make this more efficient, e.g. by building it once in the descriptor. + // Additionally, we need to consider whether to parse field names in their original proto form, + // and any overrides in the descriptor. But yes, all of this should be in the descriptor somehow... + // the descriptor can expose the dictionary. + var jsonFieldMap = descriptor.Fields.InDeclarationOrder().ToDictionary(field => JsonFormatter.ToCamelCase(field.Name)); + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + if (token.Type != JsonToken.TokenType.Name) + { + throw new InvalidOperationException("Unexpected token type " + token.Type); + } + string name = token.StringValue; + FieldDescriptor field; + if (jsonFieldMap.TryGetValue(name, out field)) + { + MergeField(message, field, tokenizer); + } + else + { + // TODO: Is this what we want to do? If not, we'll need to skip the value, + // which may be an object or array. (We might want to put code in the tokenizer + // to do that.) + throw new InvalidProtocolBufferException("Unknown field: " + name); + } + } + } + + private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + // Note: different from Java API, which just ignores it. + // TODO: Bring it more in line? Discuss... + field.Accessor.Clear(message); + return; + } + tokenizer.PushBack(token); + + if (field.IsMap) + { + MergeMapField(message, field, tokenizer); + } + else if (field.IsRepeated) + { + MergeRepeatedField(message, field, tokenizer); + } + else + { + var value = ParseSingleValue(field, tokenizer); + field.Accessor.SetValue(message, value); + } + } + + private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartArray) + { + throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type); + } + + IList list = (IList) field.Accessor.GetValue(message); + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndArray) + { + return; + } + tokenizer.PushBack(token); + list.Add(ParseSingleValue(field, tokenizer)); + } + } + + private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) + { + // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected an object to populate a map"); + } + + var type = field.MessageType; + var keyField = type.FindFieldByNumber(1); + var valueField = type.FindFieldByNumber(2); + if (keyField == null || valueField == null) + { + throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName); + } + IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message); + + while (true) + { + token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.EndObject) + { + return; + } + object key = ParseMapKey(keyField, token.StringValue); + object value = ParseSingleValue(valueField, tokenizer); + // TODO: Null handling + dictionary[key] = value; + } + } + + private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type == JsonToken.TokenType.Null) + { + if (field.FieldType == FieldType.Message && field.MessageType.FullName == Value.Descriptor.FullName) + { + return new Value { NullValue = NullValue.NULL_VALUE }; + } + return null; + } + + var fieldType = field.FieldType; + if (fieldType == FieldType.Message) + { + // Parse wrapper types as their constituent types. + // TODO: What does this mean for null? + // TODO: Detect this differently when we have dynamic messages, and put it in one place... + if (field.MessageType.IsWellKnownType && field.MessageType.File == Int32Value.Descriptor.File) + { + field = field.MessageType.Fields[Wrappers.WrapperValueFieldNumber]; + fieldType = field.FieldType; + } + else + { + // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) + tokenizer.PushBack(token); + IMessage subMessage = NewMessageForField(field); + Merge(subMessage, tokenizer); + return subMessage; + } + } + + switch (token.Type) + { + case JsonToken.TokenType.True: + case JsonToken.TokenType.False: + if (fieldType == FieldType.Bool) + { + return token.Type == JsonToken.TokenType.True; + } + // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default + // case instead, but this way we'd only need to change one place. + goto default; + case JsonToken.TokenType.StringValue: + return ParseSingleStringValue(field, token.StringValue); + // Note: not passing the number value itself here, as we may end up storing the string value in the token too. + case JsonToken.TokenType.Number: + return ParseSingleNumberValue(field, token); + case JsonToken.TokenType.Null: + throw new NotImplementedException("Haven't worked out what to do for null yet"); + default: + throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType); + } + } + + /// <summary> + /// Parses <paramref name="json"/> into a new message. + /// </summary> + /// <typeparam name="T">The type of message to create.</typeparam> + /// <param name="json">The JSON to parse.</param> + public T Parse<T>(string json) where T : IMessage, new() + { + return Parse<T>(new StringReader(json)); + } + + /// <summary> + /// Parses JSON read from <paramref name="jsonReader"/> into a new message. + /// </summary> + /// <typeparam name="T">The type of message to create.</typeparam> + /// <param name="jsonReader">Reader providing the JSON to parse.</param> + public T Parse<T>(TextReader jsonReader) where T : IMessage, new() + { + T message = new T(); + Merge(message, jsonReader); + return message; + } + + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) + { + var firstToken = tokenizer.Next(); + var fields = message.Descriptor.Fields; + switch (firstToken.Type) + { + case JsonToken.TokenType.Null: + fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0); + return; + case JsonToken.TokenType.StringValue: + fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue); + return; + case JsonToken.TokenType.Number: + fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue); + return; + case JsonToken.TokenType.False: + case JsonToken.TokenType.True: + fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True); + return; + case JsonToken.TokenType.StartObject: + { + var field = fields[Value.StructValueFieldNumber]; + var structMessage = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(structMessage, tokenizer); + field.Accessor.SetValue(message, structMessage); + return; + } + case JsonToken.TokenType.StartArray: + { + var field = fields[Value.ListValueFieldNumber]; + var list = NewMessageForField(field); + tokenizer.PushBack(firstToken); + Merge(list, tokenizer); + field.Accessor.SetValue(message, list); + return; + } + default: + throw new InvalidOperationException("Unexpected token type: " + firstToken.Type); + } + } + + private void MergeStruct(IMessage message, JsonTokenizer tokenizer) + { + var token = tokenizer.Next(); + if (token.Type != JsonToken.TokenType.StartObject) + { + throw new InvalidProtocolBufferException("Expected object value for Struct"); + } + tokenizer.PushBack(token); + + var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; + MergeMapField(message, field, tokenizer); + } + + #region Utility methods which don't depend on the state (or settings) of the parser. + private static object ParseMapKey(FieldDescriptor field, string keyText) + { + switch (field.FieldType) + { + case FieldType.Bool: + if (keyText == "true") + { + return true; + } + if (keyText == "false") + { + return false; + } + throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText); + case FieldType.String: + return keyText; + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(keyText, int.Parse, false); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(keyText, uint.Parse, false); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(keyText, long.Parse, false); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(keyText, ulong.Parse, false); + default: + throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType); + } + } + + private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token) + { + double value = token.NumberValue; + checked + { + // TODO: Validate that it's actually an integer, possibly in terms of the textual representation? + try + { + switch (field.FieldType) + { + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return (int) value; + case FieldType.UInt32: + case FieldType.Fixed32: + return (uint) value; + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return (long) value; + case FieldType.UInt64: + case FieldType.Fixed64: + return (ulong) value; + case FieldType.Double: + return value; + case FieldType.Float: + if (double.IsNaN(value)) + { + return float.NaN; + } + if (value > float.MaxValue || value < float.MinValue) + { + if (double.IsPositiveInfinity(value)) + { + return float.PositiveInfinity; + } + if (double.IsNegativeInfinity(value)) + { + return float.NegativeInfinity; + } + throw new InvalidProtocolBufferException("Value out of range: " + value); + } + return (float) value; + default: + throw new InvalidProtocolBufferException("Unsupported conversion from JSON number for field type " + field.FieldType); + } + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Value out of range: " + value); + } + } + } + + private static object ParseSingleStringValue(FieldDescriptor field, string text) + { + switch (field.FieldType) + { + case FieldType.String: + return text; + case FieldType.Bytes: + return ByteString.FromBase64(text); + case FieldType.Int32: + case FieldType.SInt32: + case FieldType.SFixed32: + return ParseNumericString(text, int.Parse, false); + case FieldType.UInt32: + case FieldType.Fixed32: + return ParseNumericString(text, uint.Parse, false); + case FieldType.Int64: + case FieldType.SInt64: + case FieldType.SFixed64: + return ParseNumericString(text, long.Parse, false); + case FieldType.UInt64: + case FieldType.Fixed64: + return ParseNumericString(text, ulong.Parse, false); + case FieldType.Double: + double d = ParseNumericString(text, double.Parse, true); + // double.Parse can return +/- infinity on Mono for non-infinite values which are out of range for double. + if (double.IsInfinity(d) && !text.Contains("Infinity")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + return d; + case FieldType.Float: + float f = ParseNumericString(text, float.Parse, true); + // float.Parse can return +/- infinity on Mono for non-infinite values which are out of range for float. + if (float.IsInfinity(f) && !text.Contains("Infinity")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + return f; + case FieldType.Enum: + var enumValue = field.EnumType.FindValueByName(text); + if (enumValue == null) + { + throw new InvalidProtocolBufferException("Invalid enum value: " + text + " for enum type: " + field.EnumType.FullName); + } + // Just return it as an int, and let the CLR convert it. + return enumValue.Number; + default: + throw new InvalidProtocolBufferException("Unsupported conversion from JSON string for field type " + field.FieldType); + } + } + + /// <summary> + /// Creates a new instance of the message type for the given field. + /// This method is mostly extracted so we can replace it in one go when we work out + /// what we want to do instead of Activator.CreateInstance. + /// </summary> + private static IMessage NewMessageForField(FieldDescriptor field) + { + // TODO: Create an instance in a better way ? + // (We could potentially add a Parser property to MessageDescriptor... see issue 806.) + return (IMessage) Activator.CreateInstance(field.MessageType.GeneratedType); + } + + private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser, bool floatingPoint) + { + // TODO: Prohibit leading zeroes (but allow 0!) + // TODO: Validate handling of "Infinity" etc. (Should be case sensitive, no leading whitespace etc) + // Can't prohibit this with NumberStyles. + if (text.StartsWith("+")) + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + if (text.StartsWith("0") && text.Length > 1) + { + if (text[1] >= '0' && text[1] <= '9') + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + } + else if (text.StartsWith("-0") && text.Length > 2) + { + if (text[2] >= '0' && text[2] <= '9') + { + throw new InvalidProtocolBufferException("Invalid numeric value: " + text); + } + } + try + { + var styles = floatingPoint + ? NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent + : NumberStyles.AllowLeadingSign; + return parser(text, styles, CultureInfo.InvariantCulture); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid numeric value for type: " + text); + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Value out of range: " + text); + } + } + + private static void MergeTimestamp(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Timestamp"); + } + var match = TimestampRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + var dateTime = match.Groups["datetime"].Value; + var subseconds = match.Groups["subseconds"].Value; + var offset = match.Groups["offset"].Value; + + try + { + DateTime parsed = DateTime.ParseExact( + dateTime, + "yyyy-MM-dd'T'HH:mm:ss", + CultureInfo.InvariantCulture, + DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal); + // TODO: It would be nice not to have to create all these objects... easy to optimize later though. + Timestamp timestamp = Timestamp.FromDateTime(parsed); + int nanosToAdd = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture); + nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length]; + } + int secondsToAdd = 0; + if (offset != "Z") + { + // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa. + int sign = offset[0] == '-' ? 1 : -1; + int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture); + int minutes = int.Parse(offset.Substring(4, 2)); + int totalMinutes = hours * 60 + minutes; + if (totalMinutes > 18 * 60) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + if (totalMinutes == 0 && sign == 1) + { + // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp. + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + // We need to *subtract* the offset from local time to get UTC. + secondsToAdd = sign * totalMinutes * 60; + } + // Ensure we've got the right signs. Currently unnecessary, but easy to do. + if (secondsToAdd < 0 && nanosToAdd > 0) + { + secondsToAdd++; + nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond; + } + if (secondsToAdd != 0 || nanosToAdd != 0) + { + timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd }; + // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this + // anywhere, but we shouldn't parse it. + if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds); + message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue); + } + } + + private static void MergeDuration(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for Duration"); + } + var match = DurationRegex.Match(token.StringValue); + if (!match.Success) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var sign = match.Groups["sign"].Value; + var secondsText = match.Groups["int"].Value; + // Prohibit leading insignficant zeroes + if (secondsText[0] == '0' && secondsText.Length > 1) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + var subseconds = match.Groups["subseconds"].Value; + var multiplier = sign == "-" ? -1 : 1; + + try + { + long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture); + int nanos = 0; + if (subseconds != "") + { + // This should always work, as we've got 1-9 digits. + int parsedFraction = int.Parse(subseconds.Substring(1)); + nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length]; + } + if (seconds >= Duration.MaxSeconds) + { + // Allow precisely 315576000000 seconds, but prohibit even 1ns more. + if (seconds > Duration.MaxSeconds || nanos > 0) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + } + message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds * multiplier); + message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos * multiplier); + } + catch (FormatException) + { + throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue); + } + } + + private static void MergeFieldMask(IMessage message, JsonToken token) + { + if (token.Type != JsonToken.TokenType.StringValue) + { + throw new InvalidProtocolBufferException("Expected string value for FieldMask"); + } + // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"? + string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries); + IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message); + foreach (var path in jsonPaths) + { + messagePaths.Add(ToSnakeCase(path)); + } + } + + // Ported from src/google/protobuf/util/internal/utility.cc + private static string ToSnakeCase(string text) + { + var builder = new StringBuilder(text.Length * 2); + bool wasNotUnderscore = false; // Initialize to false for case 1 (below) + bool wasNotCap = false; + + for (int i = 0; i < text.Length; i++) + { + char c = text[i]; + if (c >= 'A' && c <= 'Z') // ascii_isupper + { + // Consider when the current character B is capitalized: + // 1) At beginning of input: "B..." => "b..." + // (e.g. "Biscuit" => "biscuit") + // 2) Following a lowercase: "...aB..." => "...a_b..." + // (e.g. "gBike" => "g_bike") + // 3) At the end of input: "...AB" => "...ab" + // (e.g. "GoogleLAB" => "google_lab") + // 4) Followed by a lowercase: "...ABc..." => "...a_bc..." + // (e.g. "GBike" => "g_bike") + if (wasNotUnderscore && // case 1 out + (wasNotCap || // case 2 in, case 3 out + (i + 1 < text.Length && // case 3 out + (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1]) + { // case 4 in + // We add an underscore for case 2 and case 4. + builder.Append('_'); + } + // ascii_tolower, but we already know that c *is* an upper case ASCII character... + builder.Append((char) (c + 'a' - 'A')); + wasNotUnderscore = true; + wasNotCap = false; + } + else + { + builder.Append(c); + wasNotUnderscore = c != '_'; + wasNotCap = true; + } + } + return builder.ToString(); + } + #endregion + + /// <summary> + /// Settings controlling JSON parsing. (Currently doesn't have any actual settings, but I suspect + /// we'll want them for levels of strictness, descriptor pools for Any handling, etc.) + /// </summary> + public sealed class Settings + { + private static readonly Settings defaultInstance = new Settings(); + + // TODO: Add recursion limit. + + /// <summary> + /// Default settings, as used by <see cref="JsonParser.Default"/> + /// </summary> + public static Settings Default { get { return defaultInstance; } } + + /// <summary> + /// Creates a new <see cref="Settings"/> object. + /// </summary> + public Settings() + { + } + } + } +} diff --git a/csharp/src/Google.Protobuf/JsonToken.cs b/csharp/src/Google.Protobuf/JsonToken.cs new file mode 100644 index 00000000..6c0138cc --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonToken.cs @@ -0,0 +1,166 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +using System; + +namespace Google.Protobuf +{ + internal sealed class JsonToken : IEquatable<JsonToken> + { + // Tokens with no value can be reused. + private static readonly JsonToken _true = new JsonToken(TokenType.True); + private static readonly JsonToken _false = new JsonToken(TokenType.False); + private static readonly JsonToken _null = new JsonToken(TokenType.Null); + private static readonly JsonToken startObject = new JsonToken(TokenType.StartObject); + private static readonly JsonToken endObject = new JsonToken(TokenType.EndObject); + private static readonly JsonToken startArray = new JsonToken(TokenType.StartArray); + private static readonly JsonToken endArray = new JsonToken(TokenType.EndArray); + private static readonly JsonToken endDocument = new JsonToken(TokenType.EndDocument); + + internal static JsonToken Null { get { return _null; } } + internal static JsonToken False { get { return _false; } } + internal static JsonToken True { get { return _true; } } + internal static JsonToken StartObject{ get { return startObject; } } + internal static JsonToken EndObject { get { return endObject; } } + internal static JsonToken StartArray { get { return startArray; } } + internal static JsonToken EndArray { get { return endArray; } } + internal static JsonToken EndDocument { get { return endDocument; } } + + internal static JsonToken Name(string name) + { + return new JsonToken(TokenType.Name, stringValue: name); + } + + internal static JsonToken Value(string value) + { + return new JsonToken(TokenType.StringValue, stringValue: value); + } + + internal static JsonToken Value(double value) + { + return new JsonToken(TokenType.Number, numberValue: value); + } + + internal enum TokenType + { + Null, + False, + True, + StringValue, + Number, + Name, + StartObject, + EndObject, + StartArray, + EndArray, + EndDocument + } + + // A value is a string, number, array, object, null, true or false + // Arrays and objects have start/end + // A document consists of a value + // Objects are name/value sequences. + + private readonly TokenType type; + private readonly string stringValue; + private readonly double numberValue; + + internal TokenType Type { get { return type; } } + internal string StringValue { get { return stringValue; } } + internal double NumberValue { get { return numberValue; } } + + private JsonToken(TokenType type, string stringValue = null, double numberValue = 0) + { + this.type = type; + this.stringValue = stringValue; + this.numberValue = numberValue; + } + + public override bool Equals(object obj) + { + return Equals(obj as JsonToken); + } + + public override int GetHashCode() + { + unchecked + { + int hash = 17; + hash = hash * 31 + (int) type; + hash = hash * 31 + stringValue == null ? 0 : stringValue.GetHashCode(); + hash = hash * 31 + numberValue.GetHashCode(); + return hash; + } + } + + public override string ToString() + { + switch (type) + { + case TokenType.Null: + return "null"; + case TokenType.True: + return "true"; + case TokenType.False: + return "false"; + case TokenType.Name: + return "name (" + stringValue + ")"; + case TokenType.StringValue: + return "value (" + stringValue + ")"; + case TokenType.Number: + return "number (" + numberValue + ")"; + case TokenType.StartObject: + return "start-object"; + case TokenType.EndObject: + return "end-object"; + case TokenType.StartArray: + return "start-array"; + case TokenType.EndArray: + return "end-array"; + case TokenType.EndDocument: + return "end-document"; + default: + throw new InvalidOperationException("Token is of unknown type " + type); + } + } + + public bool Equals(JsonToken other) + { + if (ReferenceEquals(other, null)) + { + return false; + } + // Note use of other.numberValue.Equals rather than ==, so that NaN compares appropriately. + return other.type == type && other.stringValue == stringValue && other.numberValue.Equals(numberValue); + } + } +} diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs new file mode 100644 index 00000000..108ccebe --- /dev/null +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -0,0 +1,633 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion +using System; +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Text; + +namespace Google.Protobuf +{ + /// <summary> + /// Simple but strict JSON tokenizer, rigidly following RFC 7159. + /// </summary> + /// <remarks> + /// <para> + /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. + /// It does not create tokens for the separator between names and values, or for the comma + /// between values. It validates the token stream as it goes - so callers can assume that the + /// tokens it produces are appropriate. For example, it would never produce "start object, end array." + /// </para> + /// <para>Not thread-safe.</para> + /// </remarks> + internal sealed class JsonTokenizer + { + // The set of states in which a value is valid next token. + private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; + + private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); + private readonly PushBackReader reader; + private JsonToken bufferedToken; + private State state; + + internal JsonTokenizer(TextReader reader) + { + this.reader = new PushBackReader(reader); + state = State.StartOfDocument; + containerStack.Push(ContainerType.Document); + } + + internal void PushBack(JsonToken token) + { + if (bufferedToken != null) + { + throw new InvalidOperationException("Can't push back twice"); + } + bufferedToken = token; + } + + /// <summary> + /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, + /// after which point <c>Next()</c> should not be called again. + /// </summary> + /// <remarks> + /// This method essentially just loops through characters skipping whitespace, validating and + /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) + /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point + /// it returns the token. Although the method is large, it would be relatively hard to break down further... most + /// of it is the large switch statement, which sometimes returns and sometimes doesn't. + /// </remarks> + /// <returns>The next token in the stream. This is never null.</returns> + /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> + internal JsonToken Next() + { + if (bufferedToken != null) + { + var ret = bufferedToken; + bufferedToken = null; + return ret; + } + if (state == State.ReaderExhausted) + { + throw new InvalidOperationException("Next() called after end of document"); + } + while (true) + { + var next = reader.Read(); + if (next == null) + { + ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); + state = State.ReaderExhausted; + return JsonToken.EndDocument; + } + switch (next.Value) + { + // Skip whitespace between tokens + case ' ': + case '\t': + case '\r': + case '\n': + break; + case ':': + ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); + state = State.ObjectAfterColon; + break; + case ',': + ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); + state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; + break; + case '"': + string stringValue = ReadString(); + if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) + { + state = State.ObjectBeforeColon; + return JsonToken.Name(stringValue); + } + else + { + ValidateAndModifyStateForValue("Invalid state to read a double quote: "); + return JsonToken.Value(stringValue); + } + case '{': + ValidateState(ValueStates, "Invalid state to read an open brace: "); + state = State.ObjectStart; + containerStack.Push(ContainerType.Object); + return JsonToken.StartObject; + case '}': + ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); + PopContainer(); + return JsonToken.EndObject; + case '[': + ValidateState(ValueStates, "Invalid state to read an open square bracket: "); + state = State.ArrayStart; + containerStack.Push(ContainerType.Array); + return JsonToken.StartArray; + case ']': + ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); + PopContainer(); + return JsonToken.EndArray; + case 'n': // Start of null + ConsumeLiteral("null"); + ValidateAndModifyStateForValue("Invalid state to read a null literal: "); + return JsonToken.Null; + case 't': // Start of true + ConsumeLiteral("true"); + ValidateAndModifyStateForValue("Invalid state to read a true literal: "); + return JsonToken.True; + case 'f': // Start of false + ConsumeLiteral("false"); + ValidateAndModifyStateForValue("Invalid state to read a false literal: "); + return JsonToken.False; + case '-': // Start of a number + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + double number = ReadNumber(next.Value); + ValidateAndModifyStateForValue("Invalid state to read a number token: "); + return JsonToken.Value(number); + default: + throw new InvalidProtocolBufferException("Invalid first character of token: " + next.Value); + } + } + } + + private void ValidateState(State validStates, string errorPrefix) + { + if ((validStates & state) == 0) + { + throw new InvalidProtocolBufferException(errorPrefix + state); + } + } + + /// <summary> + /// Reads a string token. It is assumed that the opening " has already been read. + /// </summary> + private string ReadString() + { + var value = new StringBuilder(); + bool haveHighSurrogate = false; + while (true) + { + char c = reader.ReadOrFail("Unexpected end of text while reading string"); + if (c < ' ') + { + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); + } + if (c == '"') + { + if (haveHighSurrogate) + { + throw new InvalidProtocolBufferException("Invalid use of surrogate pair code units"); + } + return value.ToString(); + } + if (c == '\\') + { + c = ReadEscapedCharacter(); + } + // TODO: Consider only allowing surrogate pairs that are either both escaped, + // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate + // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. + if (haveHighSurrogate != char.IsLowSurrogate(c)) + { + throw new InvalidProtocolBufferException("Invalid use of surrogate pair code units"); + } + haveHighSurrogate = char.IsHighSurrogate(c); + value.Append(c); + } + } + + /// <summary> + /// Reads an escaped character. It is assumed that the leading backslash has already been read. + /// </summary> + private char ReadEscapedCharacter() + { + char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); + switch (c) + { + case 'n': + return '\n'; + case '\\': + return '\\'; + case 'b': + return '\b'; + case 'f': + return '\f'; + case 'r': + return '\r'; + case 't': + return '\t'; + case '"': + return '"'; + case '/': + return '/'; + case 'u': + return ReadUnicodeEscape(); + default: + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + } + + /// <summary> + /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. + /// </summary> + private char ReadUnicodeEscape() + { + int result = 0; + for (int i = 0; i < 4; i++) + { + char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); + int nybble; + if (c >= '0' && c <= '9') + { + nybble = c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + nybble = c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + nybble = c - 'A' + 10; + } + else + { + throw new InvalidProtocolBufferException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); + } + result = (result << 4) + nybble; + } + return (char) result; + } + + /// <summary> + /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. + /// It is assumed that the first letter of the literal has already been read. + /// </summary> + private void ConsumeLiteral(string text) + { + for (int i = 1; i < text.Length; i++) + { + char? next = reader.Read(); + if (next == null) + { + throw new InvalidProtocolBufferException("Unexpected end of text while reading literal token " + text); + } + if (next.Value != text[i]) + { + throw new InvalidProtocolBufferException("Unexpected character while reading literal token " + text); + } + } + } + + private double ReadNumber(char initialCharacter) + { + StringBuilder builder = new StringBuilder(); + if (initialCharacter == '-') + { + builder.Append("-"); + } + else + { + reader.PushBack(initialCharacter); + } + // Each method returns the character it read that doesn't belong in that part, + // so we know what to do next, including pushing the character back at the end. + // null is returned for "end of text". + char? next = ReadInt(builder); + if (next == '.') + { + next = ReadFrac(builder); + } + if (next == 'e' || next == 'E') + { + next = ReadExp(builder); + } + // If we read a character which wasn't part of the number, push it back so we can read it again + // to parse the next token. + if (next != null) + { + reader.PushBack(next.Value); + } + + // TODO: What exception should we throw if the value can't be represented as a double? + try + { + return double.Parse(builder.ToString(), + NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, + CultureInfo.InvariantCulture); + } + catch (OverflowException) + { + throw new InvalidProtocolBufferException("Numeric value out of range: " + builder); + } + } + + private char? ReadInt(StringBuilder builder) + { + char first = reader.ReadOrFail("Invalid numeric literal"); + if (first < '0' || first > '9') + { + throw new InvalidProtocolBufferException("Invalid numeric literal"); + } + builder.Append(first); + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (first == '0' && digitCount != 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: leading 0 for non-zero value."); + } + return next; + } + + private char? ReadFrac(StringBuilder builder) + { + builder.Append('.'); // Already consumed this + int digitCount; + char? next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: fraction with no trailing digits"); + } + return next; + } + + private char? ReadExp(StringBuilder builder) + { + builder.Append('E'); // Already consumed this (or 'e') + char? next = reader.Read(); + if (next == null) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: exponent with no trailing digits"); + } + if (next == '-' || next == '+') + { + builder.Append(next.Value); + } + else + { + reader.PushBack(next.Value); + } + int digitCount; + next = ConsumeDigits(builder, out digitCount); + if (digitCount == 0) + { + throw new InvalidProtocolBufferException("Invalid numeric literal: exponent without value"); + } + return next; + } + + private char? ConsumeDigits(StringBuilder builder, out int count) + { + count = 0; + while (true) + { + char? next = reader.Read(); + if (next == null || next.Value < '0' || next.Value > '9') + { + return next; + } + count++; + builder.Append(next.Value); + } + } + + /// <summary> + /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) + /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. + /// </summary> + private void ValidateAndModifyStateForValue(string errorPrefix) + { + ValidateState(ValueStates, errorPrefix); + switch (state) + { + case State.StartOfDocument: + state = State.ExpectedEndOfDocument; + return; + case State.ObjectAfterColon: + state = State.ObjectAfterProperty; + return; + case State.ArrayStart: + case State.ArrayAfterComma: + state = State.ArrayAfterValue; + return; + default: + throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); + } + } + + /// <summary> + /// Pops the top-most container, and sets the state to the appropriate one for the end of a value + /// in the parent container. + /// </summary> + private void PopContainer() + { + containerStack.Pop(); + var parent = containerStack.Peek(); + switch (parent) + { + case ContainerType.Object: + state = State.ObjectAfterProperty; + break; + case ContainerType.Array: + state = State.ArrayAfterValue; + break; + case ContainerType.Document: + state = State.ExpectedEndOfDocument; + break; + default: + throw new InvalidOperationException("Unexpected container type: " + parent); + } + } + + private enum ContainerType + { + Document, Object, Array + } + + /// <summary> + /// Possible states of the tokenizer. + /// </summary> + /// <remarks> + /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states + /// for checking.</para> + /// <para> + /// Each is documented with an example, + /// where ^ represents the current position within the text stream. The examples all use string values, + /// but could be any value, including nested objects/arrays. + /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). + /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which + /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. + /// </para> + /// <para> + /// These states were derived manually by reading RFC 7159 carefully. + /// </para> + /// </remarks> + [Flags] + private enum State + { + /// <summary> + /// ^ { "foo": "bar" } + /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" + /// </summary> + StartOfDocument = 1 << 0, + /// <summary> + /// { "foo": "bar" } ^ + /// After the value in a document. Next states: ReaderExhausted + /// </summary> + ExpectedEndOfDocument = 1 << 1, + /// <summary> + /// { "foo": "bar" } ^ (and already read to the end of the reader) + /// Terminal state. + /// </summary> + ReaderExhausted = 1 << 2, + /// <summary> + /// { ^ "foo": "bar" } + /// Before the *first* property in an object. + /// Next states: + /// "AfterValue" (empty object) + /// ObjectBeforeColon (read a name) + /// </summary> + ObjectStart = 1 << 3, + /// <summary> + /// { "foo" ^ : "bar", "x": "y" } + /// Next state: ObjectAfterColon + /// </summary> + ObjectBeforeColon = 1 << 4, + /// <summary> + /// { "foo" : ^ "bar", "x": "y" } + /// Before any property other than the first in an object. + /// (Equivalently: after any property in an object) + /// Next states: + /// "AfterValue" (value is simple) + /// ObjectStart (value is object) + /// ArrayStart (value is array) + /// </summary> + ObjectAfterColon = 1 << 5, + /// <summary> + /// { "foo" : "bar" ^ , "x" : "y" } + /// At the end of a property, so expecting either a comma or end-of-object + /// Next states: ObjectAfterComma or "AfterValue" + /// </summary> + ObjectAfterProperty = 1 << 6, + /// <summary> + /// { "foo":"bar", ^ "x":"y" } + /// Read the comma after the previous property, so expecting another property. + /// This is like ObjectStart, but closing brace isn't valid here + /// Next state: ObjectBeforeColon. + /// </summary> + ObjectAfterComma = 1 << 7, + /// <summary> + /// [ ^ "foo", "bar" ] + /// Before the *first* value in an array. + /// Next states: + /// "AfterValue" (read a value) + /// "AfterValue" (end of array; will pop stack) + /// </summary> + ArrayStart = 1 << 8, + /// <summary> + /// [ "foo" ^ , "bar" ] + /// After any value in an array, so expecting either a comma or end-of-array + /// Next states: ArrayAfterComma or "AfterValue" + /// </summary> + ArrayAfterValue = 1 << 9, + /// <summary> + /// [ "foo", ^ "bar" ] + /// After a comma in an array, so there *must* be another value (simple or complex). + /// Next states: "AfterValue" (simple value), StartObject, StartArray + /// </summary> + ArrayAfterComma = 1 << 10 + } + + /// <summary> + /// Wrapper around a text reader allowing small amounts of buffering and location handling. + /// </summary> + private class PushBackReader + { + // TODO: Add locations for errors etc. + + private readonly TextReader reader; + + internal PushBackReader(TextReader reader) + { + // TODO: Wrap the reader in a BufferedReader? + this.reader = reader; + } + + /// <summary> + /// The buffered next character, if we have one. + /// </summary> + private char? nextChar; + + /// <summary> + /// Returns the next character in the stream, or null if we have reached the end. + /// </summary> + /// <returns></returns> + internal char? Read() + { + if (nextChar != null) + { + char? tmp = nextChar; + nextChar = null; + return tmp; + } + int next = reader.Read(); + return next == -1 ? null : (char?) next; + } + + internal char ReadOrFail(string messageOnFailure) + { + char? next = Read(); + if (next == null) + { + throw new InvalidProtocolBufferException(messageOnFailure); + } + return next.Value; + } + + internal void PushBack(char c) + { + if (nextChar != null) + { + throw new InvalidOperationException("Cannot push back when already buffering a character"); + } + nextChar = c; + } + } + } +} diff --git a/csharp/src/Google.Protobuf/MessageParser.cs b/csharp/src/Google.Protobuf/MessageParser.cs index 6a6f1017..70c52ba6 100644 --- a/csharp/src/Google.Protobuf/MessageParser.cs +++ b/csharp/src/Google.Protobuf/MessageParser.cs @@ -142,5 +142,17 @@ namespace Google.Protobuf message.MergeFrom(input); return message; } + + /// <summary> + /// Parses a message from the given JSON. + /// </summary> + /// <param name="json">The JSON to parse.</param> + /// <returns>The parsed message.</returns> + public T ParseJson(string json) + { + T message = factory(); + JsonParser.Default.Merge(message, json); + return message; + } } } diff --git a/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs b/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs index 9a484ad6..225ac0dd 100644 --- a/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs +++ b/csharp/src/Google.Protobuf/Properties/AssemblyInfo.cs @@ -46,7 +46,10 @@ using System.Security; [assembly: AssemblyCopyright("Copyright © 2015")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
+
+#if !NCRUNCH
[assembly: AllowPartiallyTrustedCallers]
+#endif
#if SIGNED
[assembly: InternalsVisibleTo("Google.Protobuf.Test, PublicKey=" +
diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs index 18ebefd2..324f48fc 100644 --- a/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs +++ b/csharp/src/Google.Protobuf/WellKnownTypes/DurationPartial.cs @@ -48,6 +48,16 @@ namespace Google.Protobuf.WellKnownTypes public const int NanosecondsPerTick = 100; /// <summary> + /// The maximum permitted number of seconds. + /// </summary> + public const long MaxSeconds = 315576000000L; + + /// <summary> + /// The minimum permitted number of seconds. + /// </summary> + public const long MinSeconds = -315576000000L; + + /// <summary> /// Converts this <see cref="Duration"/> to a <see cref="TimeSpan"/>. /// </summary> /// <remarks>If the duration is not a precise number of ticks, it is truncated towards 0.</remarks> diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs index 68584358..d284acd6 100644 --- a/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs +++ b/csharp/src/Google.Protobuf/WellKnownTypes/TimestampPartial.cs @@ -38,6 +38,8 @@ namespace Google.Protobuf.WellKnownTypes { private static readonly DateTime UnixEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); private static readonly long BclSecondsAtUnixEpoch = UnixEpoch.Ticks / TimeSpan.TicksPerSecond; + internal static readonly long UnixSecondsAtBclMinValue = -BclSecondsAtUnixEpoch; + internal static readonly long UnixSecondsAtBclMaxValue = (DateTime.MaxValue.Ticks / TimeSpan.TicksPerSecond) - BclSecondsAtUnixEpoch; /// <summary> /// Returns the difference between one <see cref="Timestamp"/> and another, as a <see cref="Duration"/>. diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs new file mode 100644 index 00000000..b4b6c05e --- /dev/null +++ b/csharp/src/Google.Protobuf/WellKnownTypes/ValuePartial.cs @@ -0,0 +1,99 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +namespace Google.Protobuf.WellKnownTypes +{ + public partial class Value + { + /// <summary> + /// Convenience method to create a Value message with a string value. + /// </summary> + /// <param name="value">Value to set for the StringValue property.</param> + /// <returns>A newly-created Value message with the given value.</returns> + public static Value ForString(string value) + { + Preconditions.CheckNotNull(value, "value"); + return new Value { StringValue = value }; + } + + /// <summary> + /// Convenience method to create a Value message with a number value. + /// </summary> + /// <param name="value">Value to set for the NumberValue property.</param> + /// <returns>A newly-created Value message with the given value.</returns> + public static Value ForNumber(double value) + { + return new Value { NumberValue = value }; + } + + /// <summary> + /// Convenience method to create a Value message with a Boolean value. + /// </summary> + /// <param name="value">Value to set for the BoolValue property.</param> + /// <returns>A newly-created Value message with the given value.</returns> + public static Value ForBool(bool value) + { + return new Value { BoolValue = value }; + } + + /// <summary> + /// Convenience method to create a Value message with a null initial value. + /// </summary> + /// <returns>A newly-created Value message a null initial value.</returns> + public static Value ForNull() + { + return new Value { NullValue = 0 }; + } + + /// <summary> + /// Convenience method to create a Value message with an initial list of values. + /// </summary> + /// <remarks>The values provided are not cloned; the references are copied directly.</remarks> + /// <returns>A newly-created Value message an initial list value.</returns> + public static Value ForList(params Value[] values) + { + Preconditions.CheckNotNull(values, "values"); + return new Value { ListValue = new ListValue { Values = { values } } }; + } + + /// <summary> + /// Convenience method to create a Value message with an initial struct value + /// </summary> + /// <remarks>The value provided is not cloned; the reference is copied directly.</remarks> + /// <returns>A newly-created Value message an initial struct value.</returns> + public static Value ForStruct(Struct value) + { + Preconditions.CheckNotNull(value, "value"); + return new Value { StructValue = value }; + } + } +} diff --git a/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs b/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs new file mode 100644 index 00000000..efc13a01 --- /dev/null +++ b/csharp/src/Google.Protobuf/WellKnownTypes/WrappersPartial.cs @@ -0,0 +1,42 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#endregion + +namespace Google.Protobuf.WellKnownTypes +{ + public static partial class Wrappers + { + /// <summary> + /// Field number for the single "value" field in all wrapper types. + /// </summary> + internal const int WrapperValueFieldNumber = Int32Value.ValueFieldNumber; + } +} |