From 3d257a9dc1427acf163603cea95fb015e839bd2b Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Wed, 4 Nov 2015 09:28:28 +0000 Subject: Add recursion limit handling to JSON parsing. Fixes issue #932. --- .../src/Google.Protobuf.Test/CodedInputStreamTest.cs | 4 ++-- csharp/src/Google.Protobuf.Test/JsonParserTest.cs | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'csharp/src/Google.Protobuf.Test') diff --git a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs index 54c44e47..6ae02112 100644 --- a/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs +++ b/csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs @@ -284,7 +284,7 @@ namespace Google.Protobuf Assert.Throws(() => input.ReadBytes()); } - private static TestRecursiveMessage MakeRecursiveMessage(int depth) + internal static TestRecursiveMessage MakeRecursiveMessage(int depth) { if (depth == 0) { @@ -296,7 +296,7 @@ namespace Google.Protobuf } } - private static void AssertMessageDepth(TestRecursiveMessage message, int depth) + internal static void AssertMessageDepth(TestRecursiveMessage message, int depth) { if (depth == 0) { diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index b1c7b46c..cb138f53 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -723,5 +723,23 @@ namespace Google.Protobuf string json = "{} 10"; Assert.Throws(() => TestAllTypes.Parser.ParseJson(json)); } + + /// + /// JSON equivalent to + /// + [Test] + public void MaliciousRecursion() + { + string data64 = CodedInputStreamTest.MakeRecursiveMessage(64).ToString(); + string data65 = CodedInputStreamTest.MakeRecursiveMessage(65).ToString(); + + var parser64 = new JsonParser(new JsonParser.Settings(64)); + CodedInputStreamTest.AssertMessageDepth(parser64.Parse(data64), 64); + Assert.Throws(() => parser64.Parse(data65)); + + var parser63 = new JsonParser(new JsonParser.Settings(63)); + Assert.Throws(() => parser63.Parse(data64)); + + } } } -- cgit v1.2.3 From 6fa17e759737e3225c6cc4ba830b921428c50781 Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Thu, 5 Nov 2015 19:44:26 +0000 Subject: Reimplement JSON recursion by detecting the depth in the tokenizer. Added a TODO around a possible change to the tokenizer API, changing PushBack(token) into just Rewind() or something similar. --- .../src/Google.Protobuf.Test/JsonTokenizerTest.cs | 57 ++++++++++++++++++ .../InvalidProtocolBufferException.cs | 7 +++ csharp/src/Google.Protobuf/JsonParser.cs | 67 +++++++++++----------- csharp/src/Google.Protobuf/JsonTokenizer.cs | 27 +++++++++ 4 files changed, 124 insertions(+), 34 deletions(-) (limited to 'csharp/src/Google.Protobuf.Test') diff --git a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs index 868d9f75..5ca5d721 100644 --- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs @@ -81,6 +81,63 @@ namespace Google.Protobuf AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); } + [Test] + public void ObjectDepth() + { + string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; + var tokenizer = new JsonTokenizer(new StringReader(json)); + // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. + Assert.AreEqual(0, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); + Assert.AreEqual(1, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next()); + Assert.AreEqual(1, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.Value(1), tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.StartArray, tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array + Assert.AreEqual(JsonToken.Value(0), tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.EndArray, tokenizer.Next()); + Assert.AreEqual(2, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); + Assert.AreEqual(1, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.EndObject, tokenizer.Next()); + Assert.AreEqual(0, tokenizer.ObjectDepth); + Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); + Assert.AreEqual(0, tokenizer.ObjectDepth); + } + + [Test] + public void ObjectDepth_WithPushBack() + { + string json = "{}"; + var tokenizer = new JsonTokenizer(new StringReader(json)); + Assert.AreEqual(0, tokenizer.ObjectDepth); + var token = tokenizer.Next(); + Assert.AreEqual(1, tokenizer.ObjectDepth); + // When we push back a "start object", we should effectively be back to the previous depth. + tokenizer.PushBack(token); + Assert.AreEqual(0, tokenizer.ObjectDepth); + // Read the same token again, and get back to depth 1 + token = tokenizer.Next(); + Assert.AreEqual(1, tokenizer.ObjectDepth); + + // Now the same in reverse, with EndObject + token = tokenizer.Next(); + Assert.AreEqual(0, tokenizer.ObjectDepth); + tokenizer.PushBack(token); + Assert.AreEqual(1, tokenizer.ObjectDepth); + tokenizer.Next(); + Assert.AreEqual(0, tokenizer.ObjectDepth); + } + [Test] [TestCase("embedded tab\t")] [TestCase("embedded CR\r")] diff --git a/csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs b/csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs index 01d55395..cacda648 100644 --- a/csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs +++ b/csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs @@ -95,6 +95,13 @@ namespace Google.Protobuf "Use CodedInputStream.SetRecursionLimit() to increase the depth limit."); } + internal static InvalidProtocolBufferException JsonRecursionLimitExceeded() + { + return new InvalidProtocolBufferException( + "Protocol message had too many levels of nesting. May be malicious. " + + "Use JsonParser.Settings to increase the depth limit."); + } + internal static InvalidProtocolBufferException SizeLimitExceeded() { return new InvalidProtocolBufferException( diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs index 2ab8c110..4cbbd89d 100644 --- a/csharp/src/Google.Protobuf/JsonParser.cs +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -69,16 +69,16 @@ namespace Google.Protobuf // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers // and the signatures of various methods. - private static readonly Dictionary> - WellKnownTypeHandlers = new Dictionary> + private static readonly Dictionary> + WellKnownTypeHandlers = new Dictionary> { - { Timestamp.Descriptor.FullName, (parser, message, tokenizer, depth) => MergeTimestamp(message, tokenizer.Next()) }, - { Duration.Descriptor.FullName, (parser, message, tokenizer, depth) => MergeDuration(message, tokenizer.Next()) }, - { Value.Descriptor.FullName, (parser, message, tokenizer, depth) => parser.MergeStructValue(message, tokenizer, depth) }, - { ListValue.Descriptor.FullName, (parser, message, tokenizer, depth) => - parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer, depth) }, - { Struct.Descriptor.FullName, (parser, message, tokenizer, depth) => parser.MergeStruct(message, tokenizer, depth) }, - { FieldMask.Descriptor.FullName, (parser, message, tokenizer, depth) => MergeFieldMask(message, tokenizer.Next()) }, + { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, + { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, + { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, + { ListValue.Descriptor.FullName, (parser, message, tokenizer) => + parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, + { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, + { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, { Int32Value.Descriptor.FullName, MergeWrapperField }, { Int64Value.Descriptor.FullName, MergeWrapperField }, { UInt32Value.Descriptor.FullName, MergeWrapperField }, @@ -91,9 +91,9 @@ namespace Google.Protobuf // Convenience method to avoid having to repeat the same code multiple times in the above // dictionary initialization. - private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer, int depth) + private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer) { - parser.MergeField(message, message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber], tokenizer, depth); + parser.MergeField(message, message.Descriptor.Fields[Wrappers.WrapperValueFieldNumber], tokenizer); } /// @@ -130,7 +130,7 @@ namespace Google.Protobuf internal void Merge(IMessage message, TextReader jsonReader) { var tokenizer = new JsonTokenizer(jsonReader); - Merge(message, tokenizer, 0); + Merge(message, tokenizer); var lastToken = tokenizer.Next(); if (lastToken != JsonToken.EndDocument) { @@ -145,19 +145,18 @@ namespace Google.Protobuf /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON". /// - private void Merge(IMessage message, JsonTokenizer tokenizer, int depth) + private void Merge(IMessage message, JsonTokenizer tokenizer) { - if (depth > settings.RecursionLimit) + if (tokenizer.ObjectDepth > settings.RecursionLimit) { - throw InvalidProtocolBufferException.RecursionLimitExceeded(); + throw InvalidProtocolBufferException.JsonRecursionLimitExceeded(); } - depth++; if (message.Descriptor.IsWellKnownType) { - Action handler; + Action handler; if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler)) { - handler(this, message, tokenizer, depth); + handler(this, message, tokenizer); return; } // Well-known types with no special handling continue in the normal way. @@ -188,7 +187,7 @@ namespace Google.Protobuf FieldDescriptor field; if (jsonFieldMap.TryGetValue(name, out field)) { - MergeField(message, field, tokenizer, depth); + MergeField(message, field, tokenizer); } else { @@ -200,7 +199,7 @@ namespace Google.Protobuf } } - private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer, int depth) + private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) { var token = tokenizer.Next(); if (token.Type == JsonToken.TokenType.Null) @@ -214,20 +213,20 @@ namespace Google.Protobuf if (field.IsMap) { - MergeMapField(message, field, tokenizer, depth); + MergeMapField(message, field, tokenizer); } else if (field.IsRepeated) { - MergeRepeatedField(message, field, tokenizer, depth); + MergeRepeatedField(message, field, tokenizer); } else { - var value = ParseSingleValue(field, tokenizer, depth); + var value = ParseSingleValue(field, tokenizer); field.Accessor.SetValue(message, value); } } - private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer, int depth) + private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) { var token = tokenizer.Next(); if (token.Type != JsonToken.TokenType.StartArray) @@ -244,11 +243,11 @@ namespace Google.Protobuf return; } tokenizer.PushBack(token); - list.Add(ParseSingleValue(field, tokenizer, depth)); + list.Add(ParseSingleValue(field, tokenizer)); } } - private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer, int depth) + private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer) { // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those. var token = tokenizer.Next(); @@ -274,13 +273,13 @@ namespace Google.Protobuf return; } object key = ParseMapKey(keyField, token.StringValue); - object value = ParseSingleValue(valueField, tokenizer, depth); + object value = ParseSingleValue(valueField, tokenizer); // TODO: Null handling dictionary[key] = value; } } - private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer, int depth) + private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer) { var token = tokenizer.Next(); if (token.Type == JsonToken.TokenType.Null) @@ -308,7 +307,7 @@ namespace Google.Protobuf // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.) tokenizer.PushBack(token); IMessage subMessage = NewMessageForField(field); - Merge(subMessage, tokenizer, depth); + Merge(subMessage, tokenizer); return subMessage; } } @@ -358,7 +357,7 @@ namespace Google.Protobuf return message; } - private void MergeStructValue(IMessage message, JsonTokenizer tokenizer, int depth) + private void MergeStructValue(IMessage message, JsonTokenizer tokenizer) { var firstToken = tokenizer.Next(); var fields = message.Descriptor.Fields; @@ -382,7 +381,7 @@ namespace Google.Protobuf var field = fields[Value.StructValueFieldNumber]; var structMessage = NewMessageForField(field); tokenizer.PushBack(firstToken); - Merge(structMessage, tokenizer, depth); + Merge(structMessage, tokenizer); field.Accessor.SetValue(message, structMessage); return; } @@ -391,7 +390,7 @@ namespace Google.Protobuf var field = fields[Value.ListValueFieldNumber]; var list = NewMessageForField(field); tokenizer.PushBack(firstToken); - Merge(list, tokenizer, depth); + Merge(list, tokenizer); field.Accessor.SetValue(message, list); return; } @@ -400,7 +399,7 @@ namespace Google.Protobuf } } - private void MergeStruct(IMessage message, JsonTokenizer tokenizer, int depth) + private void MergeStruct(IMessage message, JsonTokenizer tokenizer) { var token = tokenizer.Next(); if (token.Type != JsonToken.TokenType.StartObject) @@ -410,7 +409,7 @@ namespace Google.Protobuf tokenizer.PushBack(token); var field = message.Descriptor.Fields[Struct.FieldsFieldNumber]; - MergeMapField(message, field, tokenizer, depth); + MergeMapField(message, field, tokenizer); } #region Utility methods which don't depend on the state (or settings) of the parser. diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs index 108ccebe..ba031886 100644 --- a/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -58,6 +58,13 @@ namespace Google.Protobuf private readonly PushBackReader reader; private JsonToken bufferedToken; private State state; + private int objectDepth = 0; + + /// + /// Returns the depth of the stack, purely in objects (not collections). + /// Informally, this is the number of remaining unclosed '{' characters we have. + /// + internal int ObjectDepth { get { return objectDepth; } } internal JsonTokenizer(TextReader reader) { @@ -66,6 +73,8 @@ namespace Google.Protobuf containerStack.Push(ContainerType.Document); } + // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous + // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). internal void PushBack(JsonToken token) { if (bufferedToken != null) @@ -73,6 +82,14 @@ namespace Google.Protobuf throw new InvalidOperationException("Can't push back twice"); } bufferedToken = token; + if (token.Type == JsonToken.TokenType.StartObject) + { + objectDepth--; + } + else if (token.Type == JsonToken.TokenType.EndObject) + { + objectDepth++; + } } /// @@ -94,6 +111,14 @@ namespace Google.Protobuf { var ret = bufferedToken; bufferedToken = null; + if (ret.Type == JsonToken.TokenType.StartObject) + { + objectDepth++; + } + else if (ret.Type == JsonToken.TokenType.EndObject) + { + objectDepth--; + } return ret; } if (state == State.ReaderExhausted) @@ -141,10 +166,12 @@ namespace Google.Protobuf ValidateState(ValueStates, "Invalid state to read an open brace: "); state = State.ObjectStart; containerStack.Push(ContainerType.Object); + objectDepth++; return JsonToken.StartObject; case '}': ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); PopContainer(); + objectDepth--; return JsonToken.EndObject; case '[': ValidateState(ValueStates, "Invalid state to read an open square bracket: "); -- cgit v1.2.3