From f0589506c96600dcd01319b9d1929d87505f3daa Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Wed, 22 Oct 2008 13:18:49 +0100 Subject: Wiping slate clean to start again with new layout. --- csharp/ProtocolBuffers/TextTokenizer.cs | 390 -------------------------------- 1 file changed, 390 deletions(-) delete mode 100644 csharp/ProtocolBuffers/TextTokenizer.cs (limited to 'csharp/ProtocolBuffers/TextTokenizer.cs') diff --git a/csharp/ProtocolBuffers/TextTokenizer.cs b/csharp/ProtocolBuffers/TextTokenizer.cs deleted file mode 100644 index d53ae596..00000000 --- a/csharp/ProtocolBuffers/TextTokenizer.cs +++ /dev/null @@ -1,390 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. -// http://code.google.com/p/protobuf/ -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -using System; -using System.Globalization; -using System.Text.RegularExpressions; - -namespace Google.ProtocolBuffers { - /// - /// Represents a stream of tokens parsed from a string. - /// - internal sealed class TextTokenizer { - private readonly string text; - private string currentToken; - - /// - /// The character index within the text to perform the next regex match at. - /// - private int matchPos = 0; - - /// - /// The character index within the text at which the current token begins. - /// - private int pos = 0; - - /// - /// The line number of the current token. - /// - private int line = 0; - /// - /// The column number of the current token. - /// - private int column = 0; - - /// - /// The line number of the previous token. - /// - private int previousLine = 0; - /// - /// The column number of the previous token. - /// - private int previousColumn = 0; - - private static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(\\s|(#.*$))+", - RegexOptions.Compiled | RegexOptions.Multiline); - private static readonly Regex TokenPattern = new Regex( - "\\G[a-zA-Z_][0-9a-zA-Z_+-]*|" + // an identifier - "\\G[0-9+-][0-9a-zA-Z_.+-]*|" + // a number - "\\G\"([^\"\\\n\\\\]|\\\\.)*(\"|\\\\?$)|" + // a double-quoted string - "\\G\'([^\"\\\n\\\\]|\\\\.)*(\'|\\\\?$)", // a single-quoted string - RegexOptions.Compiled | RegexOptions.Multiline); - - private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - private static readonly Regex FloatNan = new Regex("^nanf?$", RegexOptions.Compiled | RegexOptions.IgnoreCase); - - /** Construct a tokenizer that parses tokens from the given text. */ - public TextTokenizer(string text) { - this.text = text; - SkipWhitespace(); - NextToken(); - } - - /// - /// Are we at the end of the input? - /// - public bool AtEnd { - get { return currentToken.Length == 0; } - } - - /// - /// Advances to the next token. - /// - public void NextToken() { - previousLine = line; - previousColumn = column; - - // Advance the line counter to the current position. - while (pos < matchPos) { - if (text[pos] == '\n') { - ++line; - column = 0; - } else { - ++column; - } - ++pos; - } - - // Match the next token. - if (matchPos == text.Length) { - // EOF - currentToken = ""; - } else { - Match match = TokenPattern.Match(text, matchPos); - if (match.Success) { - currentToken = match.Value; - matchPos += match.Length; - } else { - // Take one character. - currentToken = text[matchPos].ToString(); - matchPos++; - } - - SkipWhitespace(); - } - } - - /// - /// Skip over any whitespace so that matchPos starts at the next token. - /// - private void SkipWhitespace() { - Match match = WhitespaceAndCommentPattern.Match(text, matchPos); - if (match.Success) { - matchPos += match.Length; - } - } - - /// - /// If the next token exactly matches the given token, consume it and return - /// true. Otherwise, return false without doing anything. - /// - public bool TryConsume(string token) { - if (currentToken == token) { - NextToken(); - return true; - } - return false; - } - - /* - * If the next token exactly matches {@code token}, consume it. Otherwise, - * throw a {@link ParseException}. - */ - /// - /// If the next token exactly matches the specified one, consume it. - /// Otherwise, throw a FormatException. - /// - /// - public void Consume(string token) { - if (!TryConsume(token)) { - throw CreateFormatException("Expected \"" + token + "\"."); - } - } - - /// - /// Returns true if the next token is an integer, but does not consume it. - /// - public bool LookingAtInteger() { - if (currentToken.Length == 0) { - return false; - } - - char c = currentToken[0]; - return ('0' <= c && c <= '9') || c == '-' || c == '+'; - } - - /// - /// If the next token is an identifier, consume it and return its value. - /// Otherwise, throw a FormatException. - /// - public string ConsumeIdentifier() { - foreach (char c in currentToken) { - if (('a' <= c && c <= 'z') || - ('A' <= c && c <= 'Z') || - ('0' <= c && c <= '9') || - (c == '_') || (c == '.')) { - // OK - } else { - throw CreateFormatException("Expected identifier."); - } - } - - string result = currentToken; - NextToken(); - return result; - } - - /// - /// If the next token is a 32-bit signed integer, consume it and return its - /// value. Otherwise, throw a FormatException. - /// - public int ConsumeInt32() { - try { - int result = TextFormat.ParseInt32(currentToken); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateIntegerParseException(e); - } - } - - /// - /// If the next token is a 32-bit unsigned integer, consume it and return its - /// value. Otherwise, throw a FormatException. - /// - public uint ConsumeUInt32() { - try { - uint result = TextFormat.ParseUInt32(currentToken); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateIntegerParseException(e); - } - } - - /// - /// If the next token is a 64-bit signed integer, consume it and return its - /// value. Otherwise, throw a FormatException. - /// - public long ConsumeInt64() { - try { - long result = TextFormat.ParseInt64(currentToken); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateIntegerParseException(e); - } - } - - /// - /// If the next token is a 64-bit unsigned integer, consume it and return its - /// value. Otherwise, throw a FormatException. - /// - public ulong ConsumeUInt64() { - try { - ulong result = TextFormat.ParseUInt64(currentToken); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateIntegerParseException(e); - } - } - - /// - /// If the next token is a double, consume it and return its value. - /// Otherwise, throw a FormatException. - /// - public double ConsumeDouble() { - // We need to parse infinity and nan separately because - // double.Parse() does not accept "inf", "infinity", or "nan". - if (DoubleInfinity.IsMatch(currentToken)) { - bool negative = currentToken.StartsWith("-"); - NextToken(); - return negative ? double.NegativeInfinity : double.PositiveInfinity; - } - if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase)) { - NextToken(); - return Double.NaN; - } - - try { - double result = double.Parse(currentToken, CultureInfo.InvariantCulture); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateFloatParseException(e); - } catch (OverflowException e) { - throw CreateFloatParseException(e); - } - } - - /// - /// If the next token is a float, consume it and return its value. - /// Otherwise, throw a FormatException. - /// - public float ConsumeFloat() { - - // We need to parse infinity and nan separately because - // Float.parseFloat() does not accept "inf", "infinity", or "nan". - if (FloatInfinity.IsMatch(currentToken)) { - bool negative = currentToken.StartsWith("-"); - NextToken(); - return negative ? float.NegativeInfinity : float.PositiveInfinity; - } - if (FloatNan.IsMatch(currentToken)) { - NextToken(); - return float.NaN; - } - - if (currentToken.EndsWith("f")) { - currentToken = currentToken.TrimEnd('f'); - } - - try { - float result = float.Parse(currentToken, CultureInfo.InvariantCulture); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateFloatParseException(e); - } catch (OverflowException e) { - throw CreateFloatParseException(e); - } - } - - /// - /// If the next token is a Boolean, consume it and return its value. - /// Otherwise, throw a FormatException. - /// - public bool ConsumeBoolean() { - if (currentToken == "true") { - NextToken(); - return true; - } - if (currentToken == "false") { - NextToken(); - return false; - } - throw CreateFormatException("Expected \"true\" or \"false\"."); - } - - /// - /// If the next token is a string, consume it and return its (unescaped) value. - /// Otherwise, throw a FormatException. - /// - public string ConsumeString() { - return ConsumeByteString().ToStringUtf8(); - } - - /// - /// If the next token is a string, consume it, unescape it as a - /// ByteString and return it. Otherwise, throw a FormatException. - /// - public ByteString ConsumeByteString() { - char quote = currentToken.Length > 0 ? currentToken[0] : '\0'; - if (quote != '\"' && quote != '\'') { - throw CreateFormatException("Expected string."); - } - - if (currentToken.Length < 2 || - currentToken[currentToken.Length-1] != quote) { - throw CreateFormatException("String missing ending quote."); - } - - try { - string escaped = currentToken.Substring(1, currentToken.Length - 2); - ByteString result = TextFormat.UnescapeBytes(escaped); - NextToken(); - return result; - } catch (FormatException e) { - throw CreateFormatException(e.Message); - } - } - - /// - /// Returns a format exception with the current line and column numbers - /// in the description, suitable for throwing. - /// - public FormatException CreateFormatException(string description) { - // Note: People generally prefer one-based line and column numbers. - return new FormatException((line + 1) + ":" + (column + 1) + ": " + description); - } - - /// - /// Returns a format exception with the line and column numbers of the - /// previous token in the description, suitable for throwing. - /// - public FormatException CreateFormatExceptionPreviousToken(string description) { - // Note: People generally prefer one-based line and column numbers. - return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); - } - - /// - /// Constructs an appropriate FormatException for the given existing exception - /// when trying to parse an integer. - /// - private FormatException CreateIntegerParseException(FormatException e) { - return CreateFormatException("Couldn't parse integer: " + e.Message); - } - - /// - /// Constructs an appropriate FormatException for the given existing exception - /// when trying to parse a float or double. - /// - private FormatException CreateFloatParseException(Exception e) { - return CreateFormatException("Couldn't parse number: " + e.Message); - } - } -} -- cgit v1.2.3