From 1353315dede3e22266df73fca8dd421119597e46 Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Thu, 14 Aug 2008 20:35:20 +0100 Subject: Implemented TextFormatter --- csharp/ProtocolBuffers/ByteString.cs | 12 +- csharp/ProtocolBuffers/ProtocolBuffers.csproj | 1 + csharp/ProtocolBuffers/TextFormat.cs | 397 +++++++++++++++++++++++++- csharp/ProtocolBuffers/TextGenerator.cs | 80 ++++++ 4 files changed, 475 insertions(+), 15 deletions(-) create mode 100644 csharp/ProtocolBuffers/TextGenerator.cs (limited to 'csharp') diff --git a/csharp/ProtocolBuffers/ByteString.cs b/csharp/ProtocolBuffers/ByteString.cs index 94eb45e3..7c644f9a 100644 --- a/csharp/ProtocolBuffers/ByteString.cs +++ b/csharp/ProtocolBuffers/ByteString.cs @@ -15,13 +15,15 @@ // limitations under the License. using System.Text; using System; +using System.Collections.Generic; +using System.Collections; namespace Google.ProtocolBuffers { /// /// Immutable array of bytes. /// TODO(jonskeet): Implement the common collection interfaces? /// - public sealed class ByteString { + public sealed class ByteString : IEnumerable { private static readonly ByteString empty = new ByteString(new byte[0]); @@ -105,6 +107,14 @@ namespace Google.ProtocolBuffers { return ToString(Encoding.UTF8); } + public IEnumerator GetEnumerator() { + return ((IEnumerable) bytes).GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() { + return GetEnumerator(); + } + /// /// Creates a CodedInputStream from this ByteString's data. /// diff --git a/csharp/ProtocolBuffers/ProtocolBuffers.csproj b/csharp/ProtocolBuffers/ProtocolBuffers.csproj index 706d2eaf..bcfe9f2f 100644 --- a/csharp/ProtocolBuffers/ProtocolBuffers.csproj +++ b/csharp/ProtocolBuffers/ProtocolBuffers.csproj @@ -87,6 +87,7 @@ + diff --git a/csharp/ProtocolBuffers/TextFormat.cs b/csharp/ProtocolBuffers/TextFormat.cs index 87c5e211..efa1ba94 100644 --- a/csharp/ProtocolBuffers/TextFormat.cs +++ b/csharp/ProtocolBuffers/TextFormat.cs @@ -1,35 +1,404 @@ using System; using System.Collections.Generic; +using System.IO; using System.Text; +using Google.ProtocolBuffers.Descriptors; +using System.Collections; namespace Google.ProtocolBuffers { - public class TextFormat { + /// + /// Provides ASCII text formatting support for messages. + /// TODO(jonskeet): Parsing support. + /// + public static class TextFormat { + + /// + /// Outputs a textual representation of the Protocol Message supplied into + /// the parameter output. + /// + public static void Print(IMessage message, TextWriter output) { + TextGenerator generator = new TextGenerator(output); + Print(message, generator); + } + + /// + /// Outputs a textual representation of to . + /// + /// + /// + public static void Print(UnknownFieldSet fields, TextWriter output) { + TextGenerator generator = new TextGenerator(output); + PrintUnknownFields(fields, generator); + } + public static string PrintToString(IMessage message) { - throw new NotImplementedException(); + StringWriter text = new StringWriter(); + Print(message, text); + return text.ToString(); + } + + public static string PrintToString(UnknownFieldSet fields) { + StringWriter text = new StringWriter(); + Print(fields, text); + return text.ToString(); + } + + private static void Print(IMessage message, TextGenerator generator) { + MessageDescriptor descriptor = message.DescriptorForType; + foreach (KeyValuePair entry in message.AllFields) { + PrintField(entry.Key, entry.Value, generator); + } + PrintUnknownFields(message.UnknownFields, generator); + } + + internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) { + if (field.IsRepeated) { + // Repeated field. Print each element. + foreach (object element in (IEnumerable) value) { + PrintSingleField(field, element, generator); + } + } else { + PrintSingleField(field, value, generator); + } + } + + private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) { + if (field.IsExtension) { + generator.Print("["); + // We special-case MessageSet elements for compatibility with proto1. + if (field.ContainingType.Options.MessageSetWireFormat + && field.FieldType == FieldType.Message + && field.IsOptional + // object equality (TODO(jonskeet): Work out what this comment means!) + && field.ExtensionScope == field.MessageType) { + generator.Print(field.MessageType.FullName); + } else { + generator.Print(field.FullName); + } + generator.Print("]"); + } else { + if (field.FieldType == FieldType.Group) { + // Groups must be serialized with their original capitalization. + generator.Print(field.MessageType.Name); + } else { + generator.Print(field.Name); + } + } + + if (field.MappedType == MappedType.Message) { + generator.Print(" {\n"); + generator.Indent(); + } else { + generator.Print(": "); + } + + PrintFieldValue(field, value, generator); + + if (field.MappedType == MappedType.Message) { + generator.Outdent(); + generator.Print("}"); + } + generator.Print("\n"); + } + + private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) { + switch (field.FieldType) { + case FieldType.Int32: + case FieldType.Int64: + case FieldType.SInt32: + case FieldType.SInt64: + case FieldType.SFixed32: + case FieldType.SFixed64: + case FieldType.Float: + case FieldType.Double: + case FieldType.UInt32: + case FieldType.UInt64: + case FieldType.Fixed32: + case FieldType.Fixed64: + // Good old ToString() does what we want for these types. (Including the + // unsigned ones, unlike with Java.) + generator.Print(value.ToString()); + break; + case FieldType.Bool: + // Explicitly use the Java true/false + generator.Print((bool) value ? "true" : "false"); + break; + + case FieldType.String: + generator.Print("\""); + generator.Print(EscapeText((string) value)); + generator.Print("\""); + break; + + case FieldType.Bytes: { + generator.Print("\""); + generator.Print(EscapeBytes((ByteString) value)); + generator.Print("\""); + break; + } + + case FieldType.Enum: { + generator.Print(((EnumValueDescriptor) value).Name); + break; + } + + case FieldType.Message: + case FieldType.Group: + Print((IMessage) value, generator); + break; + } } - internal static string PrintToString(UnknownFieldSet unknownFieldSet) { - throw new NotImplementedException(); + private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) { + foreach (KeyValuePair entry in unknownFields.FieldDictionary) { + String prefix = entry.Key.ToString() + ": "; + UnknownField field = entry.Value; + + foreach (ulong value in field.VarintList) { + generator.Print(entry.Key.ToString()); + generator.Print(": "); + generator.Print(value.ToString()); + generator.Print("\n"); + } + foreach (uint value in field.Fixed32List) { + generator.Print(entry.Key.ToString()); + generator.Print(": "); + // FIXME(jonskeet): Get format of this right; in Java it's %08x. Find out what this means + // Also check we're okay in terms of signed/unsigned. + generator.Print(string.Format("0x{0:x}", value)); + generator.Print("\n"); + } + foreach (ulong value in field.Fixed64List) { + generator.Print(entry.Key.ToString()); + generator.Print(": "); + // FIXME(jonskeet): Get format of this right; in Java it's %016x. Find out what this means + // Also check we're okay in terms of signed/unsigned. + generator.Print(string.Format("0x{0:x}", value)); + generator.Print("\n"); + } + foreach (ByteString value in field.LengthDelimitedList) { + generator.Print(entry.Key.ToString()); + generator.Print(": \""); + generator.Print(EscapeBytes(value)); + generator.Print("\"\n"); + } + foreach (UnknownFieldSet value in field.GroupList) { + generator.Print(entry.Key.ToString()); + generator.Print(" {\n"); + generator.Indent(); + PrintUnknownFields(value, generator); + generator.Outdent(); + generator.Print("}\n"); + } + } } - internal static object ParseUInt64(string p) { - throw new NotImplementedException(); + + internal static ulong ParseUInt64(string text) { + return (ulong) ParseInteger(text, true, false); } - internal static object ParseInt64(string p) { - throw new NotImplementedException(); + internal static long ParseInt64(string text) { + return ParseInteger(text, true, false); } - internal static object ParseUInt32(string p) { - throw new NotImplementedException(); + internal static uint ParseUInt32(string text) { + return (uint) ParseInteger(text, true, false); } - internal static object ParseInt32(string p) { - throw new NotImplementedException(); + internal static int ParseInt32(string text) { + return (int) ParseInteger(text, true, false); } - internal static object UnescapeBytes(string p) { - throw new NotImplementedException(); + /// + /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0). + /// Only a negative sign is permitted, and it must come before the radix indicator. + /// + private static long ParseInteger(string text, bool isSigned, bool isLong) { + string original = text; + bool negative = false; + if (text.StartsWith("-")) { + if (!isSigned) { + throw new FormatException("Number must be positive: " + original); + } + negative = true; + text = text.Substring(1); + } + + int radix = 10; + if (text.StartsWith("0x")) { + radix = 16; + text = text.Substring(2); + } else if (text.StartsWith("0")) { + radix = 8; + text = text.Substring(1); + } + + ulong result = Convert.ToUInt64(text, radix); + + if (negative) { + ulong max = isLong ? 0x8000000UL : 0x8000L; + if (result > max) { + throw new FormatException("Number of out range: " + original); + } + return -((long) result); + } else { + ulong max = isSigned + ? (isLong ? (ulong) long.MaxValue : int.MaxValue) + : (isLong ? ulong.MaxValue : uint.MaxValue); + if (result > max) { + throw new FormatException("Number of out range: " + original); + } + return (long) result; + } + } + + /// + /// Tests a character to see if it's an octal digit. + /// + private static bool IsOctal(char c) { + return '0' <= c && c <= '7'; + } + + /// + /// Tests a character to see if it's a hex digit. + /// + private static bool IsHex(char c) { + return ('0' <= c && c <= '9') || + ('a' <= c && c <= 'f') || + ('A' <= c && c <= 'F'); + } + + /// + /// Interprets a character as a digit (in any base up to 36) and returns the + /// numeric value. + /// + private static int ParseDigit(char c) { + if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('a' <= c && c <= 'z') { + return c - 'a' + 10; + } else { + return c - 'A' + 10; + } + } + + /// + /// Like but escapes a text string. + /// The string is first encoded as UTF-8, then each byte escaped individually. + /// The returned value is guaranteed to be entirely ASCII. + /// + static String EscapeText(string input) { + return EscapeBytes(ByteString.CopyFromUtf8(input)); + } + /// + /// Escapes bytes in the format used in protocol buffer text format, which + /// is the same as the format used for C string literals. All bytes + /// that are not printable 7-bit ASCII characters are escaped, as well as + /// backslash, single-quote, and double-quote characters. Characters for + /// which no defined short-hand escape sequence is defined will be escaped + /// using 3-digit octal sequences. + /// The returned value is guaranteed to be entirely ASCII. + /// + private static String EscapeBytes(ByteString input) { + StringBuilder builder = new StringBuilder(input.Length); + foreach (byte b in input) { + switch (b) { + // C# does not use \a or \v + case 0x07: builder.Append("\\a" ); break; + case (byte)'\b': builder.Append("\\b" ); break; + case (byte)'\f': builder.Append("\\f" ); break; + case (byte)'\n': builder.Append("\\n" ); break; + case (byte)'\r': builder.Append("\\r" ); break; + case (byte)'\t': builder.Append("\\t" ); break; + case 0x0b: builder.Append("\\v" ); break; + case (byte)'\\': builder.Append("\\\\"); break; + case (byte)'\'': builder.Append("\\\'"); break; + case (byte)'"' : builder.Append("\\\""); break; + default: + if (b >= 0x20) { + builder.Append((char) b); + } else { + builder.Append('\\'); + builder.Append((char) ('0' + ((b >> 6) & 3))); + builder.Append((char) ('0' + ((b >> 3) & 7))); + builder.Append((char) ('0' + (b & 7))); + } + break; + } + } + return builder.ToString(); + } + + /// + /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string. + /// + internal static ByteString UnescapeBytes(string input) { + byte[] result = new byte[input.Length]; + int pos = 0; + for (int i = 0; i < input.Length; i++) { + char c = input[i]; + if (c > 127 || c < 32) { + throw new FormatException("Escaped string must only contain ASCII"); + } + if (c != '\\') { + result[pos++] = (byte) c; + continue; + } + if (i + 1 >= input.Length) { + throw new FormatException("Invalid escape sequence: '\\' at end of string."); + } + + i++; + c = input[i]; + if (c >= '0' && c <= '7') { + // Octal escape. + int code = ParseDigit(c); + if (i + 1 < input.Length && IsOctal(input[i+1])) { + i++; + code = code * 8 + ParseDigit(input[i]); + } + if (i + 1 < input.Length && IsOctal(input[i+1])) { + i++; + code = code * 8 + ParseDigit(input[i]); + } + result[pos++] = (byte) code; + } else { + switch (c) { + case 'a': result[pos++] = 0x07; break; + case 'b': result[pos++] = (byte) '\b'; break; + case 'f': result[pos++] = (byte) '\f'; break; + case 'n': result[pos++] = (byte) '\n'; break; + case 'r': result[pos++] = (byte) '\r'; break; + case 't': result[pos++] = (byte) '\t'; break; + case 'v': result[pos++] = 0x0b; break; + case '\\': result[pos++] = (byte) '\\'; break; + case '\'': result[pos++] = (byte) '\''; break; + case '"': result[pos++] = (byte) '\"'; break; + + case 'x': + // hex escape + int code; + if (i + 1 < input.Length && IsHex(input[i+1])) { + i++; + code = ParseDigit(input[i]); + } else { + throw new FormatException("Invalid escape sequence: '\\x' with no digits"); + } + if (i + 1 < input.Length && IsHex(input[i+1])) { + ++i; + code = code * 16 + ParseDigit(input[i]); + } + result[pos++] = (byte)code; + break; + + default: + throw new FormatException("Invalid escape sequence: '\\" + c + "'"); + } + } + } + + return ByteString.CopyFrom(result, 0, pos); } } } diff --git a/csharp/ProtocolBuffers/TextGenerator.cs b/csharp/ProtocolBuffers/TextGenerator.cs new file mode 100644 index 00000000..724c1a8f --- /dev/null +++ b/csharp/ProtocolBuffers/TextGenerator.cs @@ -0,0 +1,80 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Google.ProtocolBuffers { + + /// + /// Helper class to control indentation + /// + internal class TextGenerator { + + /// + /// Writer to write formatted text to. + /// + private readonly TextWriter writer; + + /// + /// Keeps track of whether the next piece of text should be indented + /// + bool atStartOfLine = true; + + /// + /// Keeps track of the current level of indentation + /// + readonly StringBuilder indent = new StringBuilder(); + + /// + /// Creates a generator writing to the given writer. + /// + internal TextGenerator(TextWriter writer) { + this.writer = writer; + } + + /// + /// Indents text by two spaces. After calling Indent(), two spaces + /// will be inserted at the beginning of each line of text. Indent() may + /// be called multiple times to produce deeper indents. + /// + internal void Indent() { + indent.Append(" "); + } + + /// + /// Reduces the current indent level by two spaces. + /// + internal void Outdent() { + if (indent.Length == 0) { + throw new InvalidOperationException("Too many calls to Outdent()"); + } + indent.Length -= 2; + } + + /// + /// Prints the given text to the output stream, indenting at line boundaries. + /// + /// + public void Print(string text) { + int pos = 0; + + for (int i = 0; i < text.Length; i++) { + if (text[i] == '\n') { + // TODO(jonskeet): Use Environment.NewLine? + Write(text.Substring(pos, i - pos + 1)); + pos = i + 1; + atStartOfLine = true; + } + } + Write(text.Substring(pos)); + } + + private void Write(string data) { + if (atStartOfLine) { + atStartOfLine = false; + writer.Write(indent); + } + writer.Write(data); + } + } +} -- cgit v1.2.3