First cut at new layout

author: Jon Skeet <skeet@pobox.com> 2008-10-22 13:30:34 +0100
committer: Jon Skeet <skeet@pobox.com> 2008-10-22 13:30:34 +0100
commit: 6803686bc06c4d96afd9bd2637f7b37a58596699 (patch)
tree: 4b21c563f4cd4e399fbc0b253bc2f15e822eae88 /src/ProtocolBuffers/TextFormat.cs
parent: f0589506c96600dcd01319b9d1929d87505f3daa (diff)
download: protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.tar.gz
protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.tar.bz2
protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.zip
1 files changed, 620 insertions, 0 deletions
diff --git a/src/ProtocolBuffers/TextFormat.cs b/src/ProtocolBuffers/TextFormat.cs
new file mode 100644
index 00000000..d487bd61
--- /dev/null
+++ b/src/ProtocolBuffers/TextFormat.cs
@@ -0,0 +1,620 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.
+// http://code.google.com/p/protobuf/
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using Google.ProtocolBuffers.Descriptors;
+using System.Collections;
+
+namespace Google.ProtocolBuffers {
+  /// <summary>
+  /// Provides ASCII text formatting support for messages.
+  /// TODO(jonskeet): Parsing support.
+  /// </summary>
+  public static class TextFormat {
+
+    /// <summary>
+    /// Outputs a textual representation of the Protocol Message supplied into
+    /// the parameter output.
+    /// </summary>
+    public static void Print(IMessage message, TextWriter output) {
+      TextGenerator generator = new TextGenerator(output);
+      Print(message, generator);
+    }
+
+    /// <summary>
+    /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
+    /// </summary>
+    public static void Print(UnknownFieldSet fields, TextWriter output) {
+      TextGenerator generator = new TextGenerator(output);
+      PrintUnknownFields(fields, generator);
+    }
+
+    public static string PrintToString(IMessage message) {
+      StringWriter text = new StringWriter();
+      Print(message, text);
+      return text.ToString();
+    }
+
+    public static string PrintToString(UnknownFieldSet fields) {
+      StringWriter text = new StringWriter();
+      Print(fields, text);
+      return text.ToString();
+    }
+
+    private static void Print(IMessage message, TextGenerator generator) {
+      foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
+        PrintField(entry.Key, entry.Value, generator);
+      }
+      PrintUnknownFields(message.UnknownFields, generator);
+    }
+
+    internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
+      if (field.IsRepeated) {
+        // Repeated field.  Print each element.
+        foreach (object element in (IEnumerable) value) {
+          PrintSingleField(field, element, generator);
+        }
+      } else {
+        PrintSingleField(field, value, generator);
+      }
+    }
+
+    private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
+      if (field.IsExtension) {
+        generator.Print("[");
+        // We special-case MessageSet elements for compatibility with proto1.
+        if (field.ContainingType.Options.MessageSetWireFormat
+            && field.FieldType == FieldType.Message
+            && field.IsOptional
+            // object equality (TODO(jonskeet): Work out what this comment means!)
+            && field.ExtensionScope == field.MessageType) {
+          generator.Print(field.MessageType.FullName);
+        } else {
+          generator.Print(field.FullName);
+        }
+        generator.Print("]");
+      } else {
+        if (field.FieldType == FieldType.Group) {
+          // Groups must be serialized with their original capitalization.
+          generator.Print(field.MessageType.Name);
+        } else {
+          generator.Print(field.Name);
+        }
+      }
+
+      if (field.MappedType == MappedType.Message) {
+        generator.Print(" {\n");
+        generator.Indent();
+      } else {
+        generator.Print(": ");
+      }
+
+      PrintFieldValue(field, value, generator);
+
+      if (field.MappedType == MappedType.Message) {
+        generator.Outdent();
+        generator.Print("}");
+      }
+      generator.Print("\n");
+    }
+
+    private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
+      switch (field.FieldType) {
+        case FieldType.Int32:
+        case FieldType.Int64:
+        case FieldType.SInt32:
+        case FieldType.SInt64:
+        case FieldType.SFixed32:
+        case FieldType.SFixed64:
+        case FieldType.Float:
+        case FieldType.Double:
+        case FieldType.UInt32:
+        case FieldType.UInt64:
+        case FieldType.Fixed32:
+        case FieldType.Fixed64:
+          // The simple Object.ToString converts using the current culture.
+          // We want to always use the invariant culture so it's predictable.
+          generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
+          break;
+        case FieldType.Bool:
+          // Explicitly use the Java true/false
+          generator.Print((bool) value ? "true" : "false");
+          break;
+
+        case FieldType.String:
+          generator.Print("\"");
+          generator.Print(EscapeText((string) value));
+          generator.Print("\"");
+          break;
+
+        case FieldType.Bytes: {
+          generator.Print("\"");
+          generator.Print(EscapeBytes((ByteString) value));
+          generator.Print("\"");
+          break;
+        }
+
+        case FieldType.Enum: {
+          generator.Print(((EnumValueDescriptor) value).Name);
+          break;
+        }
+
+        case FieldType.Message:
+        case FieldType.Group:
+          Print((IMessage) value, generator);
+          break;
+      }
+    }
+
+    private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
+      foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
+        String prefix = entry.Key.ToString() + ": ";
+        UnknownField field = entry.Value;
+
+        foreach (ulong value in field.VarintList) {
+          generator.Print(prefix);
+          generator.Print(value.ToString());
+          generator.Print("\n");
+        }
+        foreach (uint value in field.Fixed32List) {
+          generator.Print(prefix);
+          generator.Print(string.Format("0x{0:x8}", value));
+          generator.Print("\n");
+        }
+        foreach (ulong value in field.Fixed64List) {
+          generator.Print(prefix);
+          generator.Print(string.Format("0x{0:x16}", value));
+          generator.Print("\n");
+        }
+        foreach (ByteString value in field.LengthDelimitedList) {
+          generator.Print(entry.Key.ToString());
+          generator.Print(": \"");
+          generator.Print(EscapeBytes(value));
+          generator.Print("\"\n");
+        }
+        foreach (UnknownFieldSet value in field.GroupList) {
+          generator.Print(entry.Key.ToString());
+          generator.Print(" {\n");
+          generator.Indent();
+          PrintUnknownFields(value, generator);
+          generator.Outdent();
+          generator.Print("}\n");
+        }
+      }
+    }
+
+    internal static ulong ParseUInt64(string text) {
+      return (ulong) ParseInteger(text, false, true);
+    }
+
+    internal static long ParseInt64(string text) {
+      return ParseInteger(text, true, true);
+    }
+
+    internal static uint ParseUInt32(string text) {
+      return (uint) ParseInteger(text, false, false);
+    }
+
+    internal static int ParseInt32(string text) {
+      return (int) ParseInteger(text, true, false);
+    }
+
+    /// <summary>
+    /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
+    /// Only a negative sign is permitted, and it must come before the radix indicator.
+    /// </summary>
+    private static long ParseInteger(string text, bool isSigned, bool isLong) {
+      string original = text;
+      bool negative = false;
+      if (text.StartsWith("-")) {
+        if (!isSigned) {
+          throw new FormatException("Number must be positive: " + original);
+        }
+        negative = true;
+        text = text.Substring(1);
+      }
+
+      int radix = 10;
+      if (text.StartsWith("0x")) {
+        radix = 16;
+        text = text.Substring(2);
+      } else if (text.StartsWith("0")) {
+        radix = 8;
+      }
+
+      ulong result;
+      try {
+        // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
+        // We should be able to use Convert.ToUInt64 for all cases.
+        result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
+      } catch (OverflowException) {
+        // Convert OverflowException to FormatException so there's a single exception type this method can throw.
+        string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
+        throw new FormatException("Number out of range for " + numberDescription + ": " + original);
+      }
+
+      if (negative) {
+        ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
+        if (result > max) {
+          string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
+          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
+        }
+        return -((long) result);
+      } else {
+        ulong max = isSigned 
+            ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
+            : (isLong ? ulong.MaxValue : uint.MaxValue);
+        if (result > max) {
+          string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
+          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
+        }
+        return (long) result;
+      }
+    }
+
+    /// <summary>
+    /// Tests a character to see if it's an octal digit.
+    /// </summary>
+    private static bool IsOctal(char c) {
+      return '0' <= c && c <= '7';
+    }
+
+    /// <summary>
+    /// Tests a character to see if it's a hex digit.
+    /// </summary>
+    private static bool IsHex(char c) {
+      return ('0' <= c && c <= '9') ||
+             ('a' <= c && c <= 'f') ||
+             ('A' <= c && c <= 'F');
+    }
+
+    /// <summary>
+    /// Interprets a character as a digit (in any base up to 36) and returns the
+    /// numeric value.
+    /// </summary>
+    private static int ParseDigit(char c) {
+      if ('0' <= c && c <= '9') {
+        return c - '0';
+      } else if ('a' <= c && c <= 'z') {
+        return c - 'a' + 10;
+      } else {
+        return c - 'A' + 10;
+      }
+    }
+
+    /// <summary>
+    /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
+    /// Two-digit hex escapes (starting with "\x" are also recognised.
+    /// </summary>
+    internal static string UnescapeText(string input) {
+      return UnescapeBytes(input).ToStringUtf8();
+    }
+
+    /// <summary>
+    /// Like <see cref="EscapeBytes" /> but escapes a text string.
+    /// The string is first encoded as UTF-8, then each byte escaped individually.
+    /// The returned value is guaranteed to be entirely ASCII.
+    /// </summary>
+    internal static string EscapeText(string input) {
+      return EscapeBytes(ByteString.CopyFromUtf8(input));
+    }
+
+    /// <summary>
+    /// Escapes bytes in the format used in protocol buffer text format, which
+    /// is the same as the format used for C string literals.  All bytes
+    /// that are not printable 7-bit ASCII characters are escaped, as well as
+    /// backslash, single-quote, and double-quote characters.  Characters for
+    /// which no defined short-hand escape sequence is defined will be escaped
+    /// using 3-digit octal sequences.
+    /// The returned value is guaranteed to be entirely ASCII.
+    /// </summary>
+    internal static String EscapeBytes(ByteString input) {
+      StringBuilder builder = new StringBuilder(input.Length);
+      foreach (byte b in input) {
+        switch (b) {
+          // C# does not use \a or \v
+          case 0x07: builder.Append("\\a" ); break;
+          case (byte)'\b': builder.Append("\\b" ); break;
+          case (byte)'\f': builder.Append("\\f" ); break;
+          case (byte)'\n': builder.Append("\\n" ); break;
+          case (byte)'\r': builder.Append("\\r" ); break;
+          case (byte)'\t': builder.Append("\\t" ); break;
+          case 0x0b: builder.Append("\\v" ); break;
+          case (byte)'\\': builder.Append("\\\\"); break;
+          case (byte)'\'': builder.Append("\\\'"); break;
+          case (byte)'"' : builder.Append("\\\""); break;
+          default:
+            if (b >= 0x20 && b < 128) {
+              builder.Append((char) b);
+            } else {
+              builder.Append('\\');
+              builder.Append((char) ('0' + ((b >> 6) & 3)));
+              builder.Append((char) ('0' + ((b >> 3) & 7)));
+              builder.Append((char) ('0' + (b & 7)));
+            }
+            break;
+        }
+      }
+      return builder.ToString();
+    }
+
+    /// <summary>
+    /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
+    /// </summary>
+    internal static ByteString UnescapeBytes(string input) {
+      byte[] result = new byte[input.Length];
+      int pos = 0;
+      for (int i = 0; i < input.Length; i++) {
+        char c = input[i];
+        if (c > 127 || c < 32) {
+          throw new FormatException("Escaped string must only contain ASCII");
+        }
+        if (c != '\\') {
+          result[pos++] = (byte) c;
+          continue;
+        }
+        if (i + 1 >= input.Length) {
+          throw new FormatException("Invalid escape sequence: '\\' at end of string.");
+        }
+
+        i++;
+        c = input[i];
+        if (c >= '0' && c <= '7') {
+          // Octal escape. 
+          int code = ParseDigit(c);
+          if (i + 1 < input.Length && IsOctal(input[i+1])) {
+            i++;
+            code = code * 8 + ParseDigit(input[i]);
+          }
+          if (i + 1 < input.Length && IsOctal(input[i+1])) {
+            i++;
+            code = code * 8 + ParseDigit(input[i]);
+          }
+          result[pos++] = (byte) code;
+        } else {
+          switch (c) {
+            case 'a': result[pos++] = 0x07; break;
+            case 'b': result[pos++] = (byte) '\b'; break;
+            case 'f': result[pos++] = (byte) '\f'; break;
+            case 'n': result[pos++] = (byte) '\n'; break;
+            case 'r': result[pos++] = (byte) '\r'; break;
+            case 't': result[pos++] = (byte) '\t'; break;
+            case 'v': result[pos++] = 0x0b; break;
+            case '\\': result[pos++] = (byte) '\\'; break;
+            case '\'': result[pos++] = (byte) '\''; break;
+            case '"': result[pos++] = (byte) '\"'; break;
+
+            case 'x':
+              // hex escape
+              int code;
+              if (i + 1 < input.Length && IsHex(input[i+1])) {
+                i++;
+                code = ParseDigit(input[i]);
+              } else {
+                throw new FormatException("Invalid escape sequence: '\\x' with no digits");
+              }
+              if (i + 1 < input.Length && IsHex(input[i+1])) {
+                ++i;
+                code = code * 16 + ParseDigit(input[i]);
+              }
+              result[pos++] = (byte)code;
+              break;
+
+            default:
+              throw new FormatException("Invalid escape sequence: '\\" + c + "'");
+          }
+        }
+      }
+
+      return ByteString.CopyFrom(result, 0, pos);
+    }
+
+    public static void Merge(string text, IBuilder builder) {
+      Merge(text, ExtensionRegistry.Empty, builder);
+    }
+
+    public static void Merge(TextReader reader, IBuilder builder) {
+      Merge(reader, ExtensionRegistry.Empty, builder);
+    }
+
+    public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
+      Merge(reader.ReadToEnd(), registry, builder);
+    }
+
+    public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
+      TextTokenizer tokenizer = new TextTokenizer(text);
+
+      while (!tokenizer.AtEnd) {
+        MergeField(tokenizer, registry, builder);
+      }
+    }
+
+    /// <summary>
+    /// Parses a single field from the specified tokenizer and merges it into
+    /// the builder.
+    /// </summary>
+    private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
+        IBuilder builder) {
+
+      FieldDescriptor field;
+      MessageDescriptor type = builder.DescriptorForType;
+      ExtensionInfo extension = null;
+
+      if (tokenizer.TryConsume("[")) {
+        // An extension.
+        StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
+        while (tokenizer.TryConsume(".")) {
+          name.Append(".");
+          name.Append(tokenizer.ConsumeIdentifier());
+        }
+
+        extension = extensionRegistry[name.ToString()];
+
+        if (extension == null) {
+          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
+        } else if (extension.Descriptor.ContainingType != type) {
+          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
+            type.FullName + "\".");
+        }
+
+        tokenizer.Consume("]");
+
+        field = extension.Descriptor;
+      } else {
+        String name = tokenizer.ConsumeIdentifier();
+        field = type.FindDescriptor<FieldDescriptor>(name);
+
+        // Group names are expected to be capitalized as they appear in the
+        // .proto file, which actually matches their type names, not their field
+        // names.
+        if (field == null) {
+          // Explicitly specify the invariant culture so that this code does not break when
+          // executing in Turkey.
+          String lowerName = name.ToLowerInvariant();
+          field = type.FindDescriptor<FieldDescriptor>(lowerName);
+          // If the case-insensitive match worked but the field is NOT a group,
+          // TODO(jonskeet): What? Java comment ends here!
+          if (field != null && field.FieldType != FieldType.Group) {
+            field = null;
+          }
+        }
+        // Again, special-case group names as described above.
+        if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
+          field = null;
+        }
+
+        if (field == null) {
+          throw tokenizer.CreateFormatExceptionPreviousToken(
+              "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
+        }
+      }
+
+      object value = null;
+
+      if (field.MappedType == MappedType.Message) {
+        tokenizer.TryConsume(":");  // optional
+
+        String endToken;
+        if (tokenizer.TryConsume("<")) {
+          endToken = ">";
+        } else {
+          tokenizer.Consume("{");
+          endToken = "}";
+        }
+
+        IBuilder subBuilder;
+        if (extension == null) {
+          subBuilder = builder.CreateBuilderForField(field);
+        } else {
+          subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
+        }
+
+        while (!tokenizer.TryConsume(endToken)) {
+          if (tokenizer.AtEnd) {
+            throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
+          }
+          MergeField(tokenizer, extensionRegistry, subBuilder);
+        }
+
+        value = subBuilder.WeakBuild();
+
+      } else {
+        tokenizer.Consume(":");
+
+        switch (field.FieldType) {
+          case FieldType.Int32:
+          case FieldType.SInt32:
+          case FieldType.SFixed32:
+            value = tokenizer.ConsumeInt32();
+            break;
+
+          case FieldType.Int64:
+          case FieldType.SInt64:
+          case FieldType.SFixed64:
+            value = tokenizer.ConsumeInt64();
+            break;
+
+          case FieldType.UInt32:
+          case FieldType.Fixed32:
+            value = tokenizer.ConsumeUInt32();
+            break;
+
+          case FieldType.UInt64:
+          case FieldType.Fixed64:
+            value = tokenizer.ConsumeUInt64();
+            break;
+
+          case FieldType.Float:
+            value = tokenizer.ConsumeFloat();
+            break;
+
+          case FieldType.Double:
+            value = tokenizer.ConsumeDouble();
+            break;
+
+          case FieldType.Bool:
+            value = tokenizer.ConsumeBoolean();
+            break;
+
+          case FieldType.String:
+            value = tokenizer.ConsumeString();
+            break;
+
+          case FieldType.Bytes:
+            value = tokenizer.ConsumeByteString();
+            break;
+
+          case FieldType.Enum: {
+            EnumDescriptor enumType = field.EnumType;
+
+            if (tokenizer.LookingAtInteger()) {
+              int number = tokenizer.ConsumeInt32();
+              value = enumType.FindValueByNumber(number);
+              if (value == null) {
+                throw tokenizer.CreateFormatExceptionPreviousToken(
+                  "Enum type \"" + enumType.FullName +
+                  "\" has no value with number " + number + ".");
+              }
+            } else {
+              String id = tokenizer.ConsumeIdentifier();
+              value = enumType.FindValueByName(id);
+              if (value == null) {
+                throw tokenizer.CreateFormatExceptionPreviousToken(
+                  "Enum type \"" + enumType.FullName +
+                  "\" has no value named \"" + id + "\".");
+              }
+            }
+
+            break;
+          }
+
+          case FieldType.Message:
+          case FieldType.Group:
+            throw new InvalidOperationException("Can't get here.");
+        }
+      }
+
+      if (field.IsRepeated) {
+        builder.WeakAddRepeatedField(field, value);
+      } else {
+        builder.SetField(field, value);
+      }
+    }
+  }
+}
author	Jon Skeet <skeet@pobox.com>	2008-10-22 13:30:34 +0100
committer	Jon Skeet <skeet@pobox.com>	2008-10-22 13:30:34 +0100
commit	6803686bc06c4d96afd9bd2637f7b37a58596699 (patch)
tree	4b21c563f4cd4e399fbc0b253bc2f15e822eae88 /src/ProtocolBuffers/TextFormat.cs
parent	f0589506c96600dcd01319b9d1929d87505f3daa (diff)
download	protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.tar.gz protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.tar.bz2 protobuf-6803686bc06c4d96afd9bd2637f7b37a58596699.zip