aboutsummaryrefslogblamecommitdiff
path: root/csharp/ProtocolBuffers/TextFormat.cs
blob: 48b2d8af26affa65405dcbd31abf51511b4d2cf2 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16















                                                                           
                                 
                           
                
                  

                                         

                                  

















                                                                                                    




                                                                         
                                                          











                                                                          





































































                                                                                                        


                                                                                         




























                                                              
     
 





                                                                                                    
                                  



                                                   

                                                            


                                                    

                                                             
















                                                                 
     
 
                                                    
                                                     

     
                                                  
                                            

     
                                                   
                                                     

     
                                                 
                                                   

     




















                                                                                        

       






                                                                                                                 

                                                                                                                      
       

                     
                                                                
                           

                                                                                                      






                                                             

                                                                                                                        



































                                                                                

                                                                                   
                  
                                                       



                                                 



                                                                                  
                                                     










                                                                              
                                                          














                                                              
                                       















                                                                                                     
                                                            
































































                                                                                           
     

                                                             
                                                    


                                                                   




                                                                                               


                                                                                         




                                                        

     



















































































































                                                                                                                               
                                             




















































                                                                   
     

   
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.
// http://code.google.com/p/protobuf/
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
using Google.ProtocolBuffers.Descriptors;
using System.Collections;

namespace Google.ProtocolBuffers {
  /// <summary>
  /// Provides ASCII text formatting support for messages.
  /// TODO(jonskeet): Parsing support.
  /// </summary>
  public static class TextFormat {

    /// <summary>
    /// Outputs a textual representation of the Protocol Message supplied into
    /// the parameter output.
    /// </summary>
    public static void Print(IMessage message, TextWriter output) {
      TextGenerator generator = new TextGenerator(output);
      Print(message, generator);
    }

    /// <summary>
    /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
    /// </summary>
    public static void Print(UnknownFieldSet fields, TextWriter output) {
      TextGenerator generator = new TextGenerator(output);
      PrintUnknownFields(fields, generator);
    }

    public static string PrintToString(IMessage message) {
      StringWriter text = new StringWriter();
      Print(message, text);
      return text.ToString();
    }

    public static string PrintToString(UnknownFieldSet fields) {
      StringWriter text = new StringWriter();
      Print(fields, text);
      return text.ToString();
    }

    private static void Print(IMessage message, TextGenerator generator) {
      foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
        PrintField(entry.Key, entry.Value, generator);
      }
      PrintUnknownFields(message.UnknownFields, generator);
    }

    internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
      if (field.IsRepeated) {
        // Repeated field.  Print each element.
        foreach (object element in (IEnumerable) value) {
          PrintSingleField(field, element, generator);
        }
      } else {
        PrintSingleField(field, value, generator);
      }
    }

    private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
      if (field.IsExtension) {
        generator.Print("[");
        // We special-case MessageSet elements for compatibility with proto1.
        if (field.ContainingType.Options.MessageSetWireFormat
            && field.FieldType == FieldType.Message
            && field.IsOptional
            // object equality (TODO(jonskeet): Work out what this comment means!)
            && field.ExtensionScope == field.MessageType) {
          generator.Print(field.MessageType.FullName);
        } else {
          generator.Print(field.FullName);
        }
        generator.Print("]");
      } else {
        if (field.FieldType == FieldType.Group) {
          // Groups must be serialized with their original capitalization.
          generator.Print(field.MessageType.Name);
        } else {
          generator.Print(field.Name);
        }
      }

      if (field.MappedType == MappedType.Message) {
        generator.Print(" {\n");
        generator.Indent();
      } else {
        generator.Print(": ");
      }

      PrintFieldValue(field, value, generator);

      if (field.MappedType == MappedType.Message) {
        generator.Outdent();
        generator.Print("}");
      }
      generator.Print("\n");
    }

    private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
      switch (field.FieldType) {
        case FieldType.Int32:
        case FieldType.Int64:
        case FieldType.SInt32:
        case FieldType.SInt64:
        case FieldType.SFixed32:
        case FieldType.SFixed64:
        case FieldType.Float:
        case FieldType.Double:
        case FieldType.UInt32:
        case FieldType.UInt64:
        case FieldType.Fixed32:
        case FieldType.Fixed64:
          // The simple Object.ToString converts using the current culture.
          // We want to always use the invariant culture so it's predictable.
          generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
          break;
        case FieldType.Bool:
          // Explicitly use the Java true/false
          generator.Print((bool) value ? "true" : "false");
          break;

        case FieldType.String:
          generator.Print("\"");
          generator.Print(EscapeText((string) value));
          generator.Print("\"");
          break;

        case FieldType.Bytes: {
          generator.Print("\"");
          generator.Print(EscapeBytes((ByteString) value));
          generator.Print("\"");
          break;
        }

        case FieldType.Enum: {
          generator.Print(((EnumValueDescriptor) value).Name);
          break;
        }

        case FieldType.Message:
        case FieldType.Group:
          Print((IMessage) value, generator);
          break;
      }
    }

    private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
      foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
        String prefix = entry.Key.ToString() + ": ";
        UnknownField field = entry.Value;

        foreach (ulong value in field.VarintList) {
          generator.Print(prefix);
          generator.Print(value.ToString());
          generator.Print("\n");
        }
        foreach (uint value in field.Fixed32List) {
          generator.Print(prefix);
          generator.Print(string.Format("0x{0:x8}", value));
          generator.Print("\n");
        }
        foreach (ulong value in field.Fixed64List) {
          generator.Print(prefix);
          generator.Print(string.Format("0x{0:x16}", value));
          generator.Print("\n");
        }
        foreach (ByteString value in field.LengthDelimitedList) {
          generator.Print(entry.Key.ToString());
          generator.Print(": \"");
          generator.Print(EscapeBytes(value));
          generator.Print("\"\n");
        }
        foreach (UnknownFieldSet value in field.GroupList) {
          generator.Print(entry.Key.ToString());
          generator.Print(" {\n");
          generator.Indent();
          PrintUnknownFields(value, generator);
          generator.Outdent();
          generator.Print("}\n");
        }
      }
    }

    internal static ulong ParseUInt64(string text) {
      return (ulong) ParseInteger(text, false, true);
    }

    internal static long ParseInt64(string text) {
      return ParseInteger(text, true, true);
    }

    internal static uint ParseUInt32(string text) {
      return (uint) ParseInteger(text, false, false);
    }

    internal static int ParseInt32(string text) {
      return (int) ParseInteger(text, true, false);
    }

    /// <summary>
    /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
    /// Only a negative sign is permitted, and it must come before the radix indicator.
    /// </summary>
    private static long ParseInteger(string text, bool isSigned, bool isLong) {
      string original = text;
      bool negative = false;
      if (text.StartsWith("-")) {
        if (!isSigned) {
          throw new FormatException("Number must be positive: " + original);
        }
        negative = true;
        text = text.Substring(1);
      }

      int radix = 10;
      if (text.StartsWith("0x")) {
        radix = 16;
        text = text.Substring(2);
      } else if (text.StartsWith("0")) {
        radix = 8;
      }

      ulong result;
      try {
        // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
        // We should be able to use Convert.ToUInt64 for all cases.
        result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
      } catch (OverflowException) {
        // Convert OverflowException to FormatException so there's a single exception type this method can throw.
        string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
        throw new FormatException("Number out of range for " + numberDescription + ": " + original);
      }

      if (negative) {
        ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
        if (result > max) {
          string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
        }
        return -((long) result);
      } else {
        ulong max = isSigned 
            ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
            : (isLong ? ulong.MaxValue : uint.MaxValue);
        if (result > max) {
          string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
        }
        return (long) result;
      }
    }

    /// <summary>
    /// Tests a character to see if it's an octal digit.
    /// </summary>
    private static bool IsOctal(char c) {
      return '0' <= c && c <= '7';
    }

    /// <summary>
    /// Tests a character to see if it's a hex digit.
    /// </summary>
    private static bool IsHex(char c) {
      return ('0' <= c && c <= '9') ||
             ('a' <= c && c <= 'f') ||
             ('A' <= c && c <= 'F');
    }

    /// <summary>
    /// Interprets a character as a digit (in any base up to 36) and returns the
    /// numeric value.
    /// </summary>
    private static int ParseDigit(char c) {
      if ('0' <= c && c <= '9') {
        return c - '0';
      } else if ('a' <= c && c <= 'z') {
        return c - 'a' + 10;
      } else {
        return c - 'A' + 10;
      }
    }

    /// <summary>
    /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
    /// Two-digit hex escapes (starting with "\x" are also recognised.
    /// </summary>
    internal static string UnescapeText(string input) {
      return UnescapeBytes(input).ToStringUtf8();
    }

    /// <summary>
    /// Like <see cref="EscapeBytes" /> but escapes a text string.
    /// The string is first encoded as UTF-8, then each byte escaped individually.
    /// The returned value is guaranteed to be entirely ASCII.
    /// </summary>
    internal static string EscapeText(string input) {
      return EscapeBytes(ByteString.CopyFromUtf8(input));
    }
    /// <summary>
    /// Escapes bytes in the format used in protocol buffer text format, which
    /// is the same as the format used for C string literals.  All bytes
    /// that are not printable 7-bit ASCII characters are escaped, as well as
    /// backslash, single-quote, and double-quote characters.  Characters for
    /// which no defined short-hand escape sequence is defined will be escaped
    /// using 3-digit octal sequences.
    /// The returned value is guaranteed to be entirely ASCII.
    /// </summary>
    internal static String EscapeBytes(ByteString input) {
      StringBuilder builder = new StringBuilder(input.Length);
      foreach (byte b in input) {
        switch (b) {
          // C# does not use \a or \v
          case 0x07: builder.Append("\\a" ); break;
          case (byte)'\b': builder.Append("\\b" ); break;
          case (byte)'\f': builder.Append("\\f" ); break;
          case (byte)'\n': builder.Append("\\n" ); break;
          case (byte)'\r': builder.Append("\\r" ); break;
          case (byte)'\t': builder.Append("\\t" ); break;
          case 0x0b: builder.Append("\\v" ); break;
          case (byte)'\\': builder.Append("\\\\"); break;
          case (byte)'\'': builder.Append("\\\'"); break;
          case (byte)'"' : builder.Append("\\\""); break;
          default:
            if (b >= 0x20 && b < 128) {
              builder.Append((char) b);
            } else {
              builder.Append('\\');
              builder.Append((char) ('0' + ((b >> 6) & 3)));
              builder.Append((char) ('0' + ((b >> 3) & 7)));
              builder.Append((char) ('0' + (b & 7)));
            }
            break;
        }
      }
      return builder.ToString();
    }

    /// <summary>
    /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
    /// </summary>
    internal static ByteString UnescapeBytes(string input) {
      byte[] result = new byte[input.Length];
      int pos = 0;
      for (int i = 0; i < input.Length; i++) {
        char c = input[i];
        if (c > 127 || c < 32) {
          throw new FormatException("Escaped string must only contain ASCII");
        }
        if (c != '\\') {
          result[pos++] = (byte) c;
          continue;
        }
        if (i + 1 >= input.Length) {
          throw new FormatException("Invalid escape sequence: '\\' at end of string.");
        }

        i++;
        c = input[i];
        if (c >= '0' && c <= '7') {
          // Octal escape. 
          int code = ParseDigit(c);
          if (i + 1 < input.Length && IsOctal(input[i+1])) {
            i++;
            code = code * 8 + ParseDigit(input[i]);
          }
          if (i + 1 < input.Length && IsOctal(input[i+1])) {
            i++;
            code = code * 8 + ParseDigit(input[i]);
          }
          result[pos++] = (byte) code;
        } else {
          switch (c) {
            case 'a': result[pos++] = 0x07; break;
            case 'b': result[pos++] = (byte) '\b'; break;
            case 'f': result[pos++] = (byte) '\f'; break;
            case 'n': result[pos++] = (byte) '\n'; break;
            case 'r': result[pos++] = (byte) '\r'; break;
            case 't': result[pos++] = (byte) '\t'; break;
            case 'v': result[pos++] = 0x0b; break;
            case '\\': result[pos++] = (byte) '\\'; break;
            case '\'': result[pos++] = (byte) '\''; break;
            case '"': result[pos++] = (byte) '\"'; break;

            case 'x':
              // hex escape
              int code;
              if (i + 1 < input.Length && IsHex(input[i+1])) {
                i++;
                code = ParseDigit(input[i]);
              } else {
                throw new FormatException("Invalid escape sequence: '\\x' with no digits");
              }
              if (i + 1 < input.Length && IsHex(input[i+1])) {
                ++i;
                code = code * 16 + ParseDigit(input[i]);
              }
              result[pos++] = (byte)code;
              break;

            default:
              throw new FormatException("Invalid escape sequence: '\\" + c + "'");
          }
        }
      }

      return ByteString.CopyFrom(result, 0, pos);
    }

    public static void Merge(string text, IBuilder builder) {
      Merge(text, ExtensionRegistry.Empty, builder);
    }

    public static void Merge(TextReader reader, IBuilder builder) {
      Merge(reader, ExtensionRegistry.Empty, builder);
    }

    public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
      Merge(reader.ReadToEnd(), registry, builder);
    }

    public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
      TextTokenizer tokenizer = new TextTokenizer(text);

      while (!tokenizer.AtEnd) {
        MergeField(tokenizer, registry, builder);
      }
    }

    /// <summary>
    /// Parses a single field from the specified tokenizer and merges it into
    /// the builder.
    /// </summary>
    private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
        IBuilder builder) {

      FieldDescriptor field;
      MessageDescriptor type = builder.DescriptorForType;
      ExtensionInfo extension = null;

      if (tokenizer.TryConsume("[")) {
        // An extension.
        StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
        while (tokenizer.TryConsume(".")) {
          name.Append(".");
          name.Append(tokenizer.ConsumeIdentifier());
        }

        extension = extensionRegistry[name.ToString()];

        if (extension == null) {
          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
        } else if (extension.Descriptor.ContainingType != type) {
          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
            type.FullName + "\".");
        }

        tokenizer.Consume("]");

        field = extension.Descriptor;
      } else {
        String name = tokenizer.ConsumeIdentifier();
        field = type.FindDescriptor<FieldDescriptor>(name);

        // Group names are expected to be capitalized as they appear in the
        // .proto file, which actually matches their type names, not their field
        // names.
        if (field == null) {
          // Explicitly specify the invariant culture so that this code does not break when
          // executing in Turkey.
          String lowerName = name.ToLowerInvariant();
          field = type.FindDescriptor<FieldDescriptor>(lowerName);
          // If the case-insensitive match worked but the field is NOT a group,
          // TODO(jonskeet): What? Java comment ends here!
          if (field != null && field.FieldType != FieldType.Group) {
            field = null;
          }
        }
        // Again, special-case group names as described above.
        if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
          field = null;
        }

        if (field == null) {
          throw tokenizer.CreateFormatExceptionPreviousToken(
              "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
        }
      }

      object value = null;

      if (field.MappedType == MappedType.Message) {
        tokenizer.TryConsume(":");  // optional

        String endToken;
        if (tokenizer.TryConsume("<")) {
          endToken = ">";
        } else {
          tokenizer.Consume("{");
          endToken = "}";
        }

        IBuilder subBuilder;
        if (extension == null) {
          subBuilder = builder.CreateBuilderForField(field);
        } else {
          subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
        }

        while (!tokenizer.TryConsume(endToken)) {
          if (tokenizer.AtEnd) {
            throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
          }
          MergeField(tokenizer, extensionRegistry, subBuilder);
        }

        value = subBuilder.WeakBuild();

      } else {
        tokenizer.Consume(":");

        switch (field.FieldType) {
          case FieldType.Int32:
          case FieldType.SInt32:
          case FieldType.SFixed32:
            value = tokenizer.ConsumeInt32();
            break;

          case FieldType.Int64:
          case FieldType.SInt64:
          case FieldType.SFixed64:
            value = tokenizer.ConsumeInt64();
            break;

          case FieldType.UInt32:
          case FieldType.Fixed32:
            value = tokenizer.ConsumeUInt32();
            break;

          case FieldType.UInt64:
          case FieldType.Fixed64:
            value = tokenizer.ConsumeUInt64();
            break;

          case FieldType.Float:
            value = tokenizer.ConsumeFloat();
            break;

          case FieldType.Double:
            value = tokenizer.ConsumeDouble();
            break;

          case FieldType.Bool:
            value = tokenizer.ConsumeBoolean();
            break;

          case FieldType.String:
            value = tokenizer.ConsumeString();
            break;

          case FieldType.Bytes:
            value = tokenizer.ConsumeByteString();
            break;

          case FieldType.Enum: {
            EnumDescriptor enumType = field.EnumType;

            if (tokenizer.LookingAtInteger()) {
              int number = tokenizer.ConsumeInt32();
              value = enumType.FindValueByNumber(number);
              if (value == null) {
                throw tokenizer.CreateFormatExceptionPreviousToken(
                  "Enum type \"" + enumType.FullName +
                  "\" has no value with number " + number + ".");
              }
            } else {
              String id = tokenizer.ConsumeIdentifier();
              value = enumType.FindValueByName(id);
              if (value == null) {
                throw tokenizer.CreateFormatExceptionPreviousToken(
                  "Enum type \"" + enumType.FullName +
                  "\" has no value named \"" + id + "\".");
              }
            }

            break;
          }

          case FieldType.Message:
          case FieldType.Group:
            throw new InvalidOperationException("Can't get here.");
        }
      }

      if (field.IsRepeated) {
        builder.WeakAddRepeatedField(field, value);
      } else {
        builder.SetField(field, value);
      }
    }
  }
}