diff options
Diffstat (limited to 'src/google/protobuf/io/tokenizer.h')
-rw-r--r-- | src/google/protobuf/io/tokenizer.h | 85 |
1 files changed, 78 insertions, 7 deletions
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h index 8f759abb..d85b82f9 100644 --- a/src/google/protobuf/io/tokenizer.h +++ b/src/google/protobuf/io/tokenizer.h @@ -38,6 +38,7 @@ #define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ #include <string> +#include <vector> #include <google/protobuf/stubs/common.h> namespace google { @@ -137,6 +138,53 @@ class LIBPROTOBUF_EXPORT Tokenizer { // reached. bool Next(); + // Like Next(), but also collects comments which appear between the previous + // and next tokens. + // + // Comments which appear to be attached to the previous token are stored + // in *prev_tailing_comments. Comments which appear to be attached to the + // next token are stored in *next_leading_comments. Comments appearing in + // between which do not appear to be attached to either will be added to + // detached_comments. Any of these parameters can be NULL to simply discard + // the comments. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // Only the comment content is returned; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk will + // be stripped from the beginning of each line other than the first. Newlines + // are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // // Detached comment. This is not attached to qux or corge + // // because there are blank lines separating it from both. + // + // optional string corge = 5; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + bool NextWithComments(string* prev_trailing_comments, + vector<string>* detached_comments, + string* next_leading_comments); + // Parse helpers --------------------------------------------------- // Parses a TYPE_FLOAT token. This never fails, so long as the text actually @@ -200,11 +248,12 @@ class LIBPROTOBUF_EXPORT Tokenizer { int line_; int column_; - // Position in buffer_ where StartToken() was called. If the token - // started in the previous buffer, this is zero, and current_.text already - // contains the part of the token from the previous buffer. If not - // currently parsing a token, this is -1. - int token_start_; + // String to which text should be appended as we advance through it. + // Call RecordTo(&str) to start recording and StopRecording() to stop. + // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the + // position within the current buffer where recording started. + string* record_target_; + int record_start_; // Options. bool allow_f_after_float_; @@ -223,6 +272,9 @@ class LIBPROTOBUF_EXPORT Tokenizer { // Read a new buffer from the input. void Refresh(); + inline void RecordTo(string* target); + inline void StopRecording(); + // Called when the current character is the first character of a new // token (not including whitespace or comments). inline void StartToken(); @@ -255,9 +307,28 @@ class LIBPROTOBUF_EXPORT Tokenizer { TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); // Consume the rest of a line. - void ConsumeLineComment(); + void ConsumeLineComment(string* content); // Consume until "*/". - void ConsumeBlockComment(); + void ConsumeBlockComment(string* content); + + enum NextCommentStatus { + // Started a line comment. + LINE_COMMENT, + + // Started a block comment. + BLOCK_COMMENT, + + // Consumed a slash, then realized it wasn't a comment. current_ has + // been filled in with a slash token. The caller should return it. + SLASH_NOT_COMMENT, + + // We do not appear to be starting a comment here. + NO_COMMENT + }; + + // If we're at the start of a new comment, consume it and return what kind + // of comment it is. + NextCommentStatus TryConsumeCommentStart(); // ----------------------------------------------------------------- // These helper methods make the parsing code more readable. The |