diff options
Diffstat (limited to 'src/google/protobuf/util/internal/json_stream_parser.h')
-rw-r--r-- | src/google/protobuf/util/internal/json_stream_parser.h | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/src/google/protobuf/util/internal/json_stream_parser.h b/src/google/protobuf/util/internal/json_stream_parser.h new file mode 100644 index 00000000..17b094ae --- /dev/null +++ b/src/google/protobuf/util/internal/json_stream_parser.h @@ -0,0 +1,256 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ +#define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ + +#include <stack> +#include <string> + +#include <google/protobuf/stubs/common.h> +#include <google/protobuf/stubs/stringpiece.h> +#include <google/protobuf/stubs/status.h> + +namespace google { +namespace util { +class Status; +} // namespace util + +namespace protobuf { +namespace util { +namespace converter { + +class ObjectWriter; + +// A JSON parser that can parse a stream of JSON chunks rather than needing the +// entire JSON string up front. It is a modified version of the parser in +// //net/proto/json/json-parser.h that has been changed in the following ways: +// - Changed from recursion to an explicit stack to allow resumption +// - Added support for int64 and uint64 numbers +// - Removed support for octal and decimal escapes +// - Removed support for numeric keys +// - Removed support for functions (javascript) +// - Removed some lax-comma support (but kept trailing comma support) +// - Writes directly to an ObjectWriter rather than using subclassing +// +// Here is an example usage: +// JsonStreamParser parser(ow_.get()); +// util::Status result = parser.Parse(chunk1); +// result.Update(parser.Parse(chunk2)); +// result.Update(parser.FinishParse()); +// GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON"; +// +// This parser is thread-compatible as long as only one thread is calling a +// Parse() method at a time. +class LIBPROTOBUF_EXPORT JsonStreamParser { + public: + // Creates a JsonStreamParser that will write to the given ObjectWriter. + explicit JsonStreamParser(ObjectWriter* ow); + virtual ~JsonStreamParser(); + + // Parse a JSON string (UTF-8 encoded). + util::Status Parse(StringPiece json); + + // Finish parsing the JSON string. + util::Status FinishParse(); + + private: + enum TokenType { + BEGIN_STRING, // " or ' + BEGIN_NUMBER, // - or digit + BEGIN_TRUE, // true + BEGIN_FALSE, // false + BEGIN_NULL, // null + BEGIN_OBJECT, // { + END_OBJECT, // } + BEGIN_ARRAY, // [ + END_ARRAY, // ] + ENTRY_SEPARATOR, // : + VALUE_SEPARATOR, // , + BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit + UNKNOWN // Unknown token or we ran out of the stream. + }; + + enum ParseType { + VALUE, // Expects a {, [, true, false, null, string or number + OBJ_MID, // Expects a ',' or } + ENTRY, // Expects a key or } + ENTRY_MID, // Expects a : + ARRAY_VALUE, // Expects a value or ] + ARRAY_MID // Expects a ',' or ] + }; + + // Holds the result of parsing a number + struct NumberResult { + enum Type { DOUBLE, INT, UINT }; + Type type; + union { + double double_val; + int64 int_val; + uint64 uint_val; + }; + }; + + // Parses a single chunk of JSON, returning an error if the JSON was invalid. + util::Status ParseChunk(StringPiece json); + + // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs + // out of data. If we unexpectedly run out of p_ we push the latest back onto + // the stack and return. + util::Status RunParser(); + + // Parses a value from p_ and writes it to ow_. + // A value may be an object, array, true, false, null, string or number. + util::Status ParseValue(TokenType type); + + // Parses a string and writes it out to the ow_. + util::Status ParseString(); + + // Parses a string, storing the result in parsed_. + util::Status ParseStringHelper(); + + // This function parses unicode escape sequences in strings. It returns an + // error when there's a parsing error, either the size is not the expected + // size or a character is not a hex digit. When it returns str will contain + // what has been successfully parsed so far. + util::Status ParseUnicodeEscape(); + + // Expects p_ to point to a JSON number, writes the number to the writer using + // the appropriate Render method based on the type of number. + util::Status ParseNumber(); + + // Parse a number into a NumberResult, reporting an error if no number could + // be parsed. This method will try to parse into a uint64, int64, or double + // based on whether the number was positive or negative or had a decimal + // component. + util::Status ParseNumberHelper(NumberResult* result); + + // Handles a { during parsing of a value. + util::Status HandleBeginObject(); + + // Parses from the ENTRY state. + util::Status ParseEntry(TokenType type); + + // Parses from the ENTRY_MID state. + util::Status ParseEntryMid(TokenType type); + + // Parses from the OBJ_MID state. + util::Status ParseObjectMid(TokenType type); + + // Handles a [ during parsing of a value. + util::Status HandleBeginArray(); + + // Parses from the ARRAY_VALUE state. + util::Status ParseArrayValue(TokenType type); + + // Parses from the ARRAY_MID state. + util::Status ParseArrayMid(TokenType type); + + // Expects p_ to point to an unquoted literal + util::Status ParseTrue(); + util::Status ParseFalse(); + util::Status ParseNull(); + + // Report a failure as a util::Status. + util::Status ReportFailure(StringPiece message); + + // Report a failure due to an UNKNOWN token type. We check if we hit the + // end of the stream and if we're finishing or not to detect what type of + // status to return in this case. + util::Status ReportUnknown(StringPiece message); + + // Advance p_ past all whitespace or until the end of the string. + void SkipWhitespace(); + + // Advance p_ one UTF-8 character + void Advance(); + + // Expects p_ to point to the beginning of a key. + util::Status ParseKey(); + + // Return the type of the next token at p_. + TokenType GetNextTokenType(); + + // The object writer to write parse events to. + ObjectWriter* ow_; + + // The stack of parsing we still need to do. When the stack runs empty we will + // have parsed a single value from the root (e.g. an object or list). + std::stack<ParseType> stack_; + + // Contains any leftover text from a previous chunk that we weren't able to + // fully parse, for example the start of a key or number. + string leftover_; + + // The current chunk of JSON being parsed. Primarily used for providing + // context during error reporting. + StringPiece json_; + + // A pointer within the current JSON being parsed, used to track location. + StringPiece p_; + + // Stores the last key read, as we separate parsing of keys and values. + StringPiece key_; + + // Storage for key_ if we need to keep ownership, for example between chunks + // or if the key was unescaped from a JSON string. + string key_storage_; + + // True during the FinishParse() call, so we know that any errors are fatal. + // For example an unterminated string will normally result in cancelling and + // trying during the next chunk, but during FinishParse() it is an error. + bool finishing_; + + // String we parsed during a call to ParseStringHelper(). + StringPiece parsed_; + + // Storage for the string we parsed. This may be empty if the string was able + // to be parsed directly from the input. + string parsed_storage_; + + // The character that opened the string, either ' or ". + // A value of 0 indicates that string parsing is not in process. + char string_open_; + + // Storage for utf8-coerced bytes. + google::protobuf::scoped_array<char> utf8_storage_; + + // Length of the storage for utf8-coerced bytes. + int utf8_length_; + + GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser); +}; + +} // namespace converter +} // namespace util +} // namespace protobuf + +} // namespace google +#endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__ |