using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using Google.ProtocolBuffers.Descriptors;
namespace Google.ProtocolBuffers.ProtoMunge
{
///
/// Utility console application which takes a message descriptor and a corresponding message,
/// and produces a new message with similar but random data. The data is the same length
/// as the original, but with random values within appropriate bands. (For instance, a compressed
/// integer in the range 0-127 will end up as another integer in the same range, to keep the length
/// the same.)
/// TODO(jonskeet): Potentially refactor to use an instance instead, making it simpler to
/// be thread-safe for external use.
///
public sealed class Program {
static readonly Random rng = new Random();
static int Main(string[] args) {
if (args.Length != 3) {
Console.Error.WriteLine("Usage: ProtoMunge ");
Console.Error.WriteLine("The descriptor type name is the fully-qualified message name, including assembly.");
Console.Error.WriteLine("(At a future date it may be possible to do this without building the .NET assembly at all.)");
return 1;
}
IMessage defaultMessage;
try {
defaultMessage = MessageUtil.GetDefaultMessage(args[0]);
} catch (ArgumentException e) {
Console.Error.WriteLine(e.Message);
return 1;
}
try {
IBuilder builder = defaultMessage.WeakCreateBuilderForType();
byte[] inputData = File.ReadAllBytes(args[1]);
builder.WeakMergeFrom(ByteString.CopyFrom(inputData));
IMessage original = builder.WeakBuild();
IMessage munged = Munge(original);
if (original.SerializedSize != munged.SerializedSize) {
throw new Exception("Serialized sizes don't match");
}
File.WriteAllBytes(args[2], munged.ToByteArray());
return 0;
} catch (Exception e) {
Console.Error.WriteLine("Error: {0}", e.Message);
Console.Error.WriteLine();
Console.Error.WriteLine("Detailed exception information: {0}", e);
return 1;
}
}
///
/// Munges a message recursively.
///
/// A new message of the same type as the original message,
/// but munged so that all the data is desensitised.
private static IMessage Munge(IMessage message) {
IBuilder builder = message.WeakCreateBuilderForType();
foreach (var pair in message.AllFields) {
if (pair.Key.IsRepeated) {
foreach (object singleValue in (IEnumerable)pair.Value) {
builder.WeakAddRepeatedField(pair.Key, CheckedMungeValue(pair.Key, singleValue));
}
} else {
builder[pair.Key] = CheckedMungeValue(pair.Key, pair.Value);
}
}
IMessage munged = builder.WeakBuild();
if (message.SerializedSize != munged.SerializedSize) {
Console.WriteLine("Sub message sizes: {0}/{1}", message.SerializedSize, munged.SerializedSize);
}
return munged;
}
///
/// Munges a single value and checks that the length ends up the same as it was before.
///
private static object CheckedMungeValue(FieldDescriptor fieldDescriptor, object value) {
int currentSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, value);
object mungedValue = MungeValue(fieldDescriptor, value);
int mungedSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, mungedValue);
// Exceptions log more easily than assertions
if (currentSize != mungedSize) {
throw new Exception("Munged value had wrong size. Field type: " + fieldDescriptor.FieldType
+ "; old value: " + value + "; new value: " + mungedValue);
}
return mungedValue;
}
///
/// Munges a single value of the specified field descriptor. (i.e. if the field is
/// actually a repeated int, this method receives a single int value to munge, and
/// is called multiple times).
///
private static object MungeValue(FieldDescriptor fieldDescriptor, object value) {
switch (fieldDescriptor.FieldType) {
case FieldType.SInt64:
case FieldType.Int64:
return (long) MungeVarint64((ulong) (long)value);
case FieldType.UInt64:
return MungeVarint64((ulong)value);
case FieldType.SInt32:
return (int)MungeVarint32((uint)(int)value);
case FieldType.Int32:
return MungeInt32((int) value);
case FieldType.UInt32:
return MungeVarint32((uint)value);
case FieldType.Double:
return rng.NextDouble();
case FieldType.Float:
return (float)rng.NextDouble();
case FieldType.Fixed64: {
byte[] data = new byte[8];
rng.NextBytes(data);
return BitConverter.ToUInt64(data, 0);
}
case FieldType.Fixed32: {
byte[] data = new byte[4];
rng.NextBytes(data);
return BitConverter.ToUInt32(data, 0);
}
case FieldType.Bool:
return rng.Next(2) == 1;
case FieldType.String:
return MungeString((string)value);
case FieldType.Group:
case FieldType.Message:
return Munge((IMessage)value);
case FieldType.Bytes:
return MungeByteString((ByteString)value);
case FieldType.SFixed64: {
byte[] data = new byte[8];
rng.NextBytes(data);
return BitConverter.ToInt64(data, 0);
}
case FieldType.SFixed32: {
byte[] data = new byte[4];
rng.NextBytes(data);
return BitConverter.ToInt32(data, 0);
}
case FieldType.Enum:
return MungeEnum(fieldDescriptor, (EnumValueDescriptor) value);
default:
// TODO(jonskeet): Different exception?
throw new ArgumentException("Invalid field descriptor");
}
}
private static object MungeString(string original) {
foreach (char c in original) {
if (c > 127) {
throw new ArgumentException("Can't handle non-ascii yet");
}
}
char[] chars = new char[original.Length];
// Convert to pure ASCII - no control characters.
for (int i = 0; i < chars.Length; i++) {
chars[i] = (char) rng.Next(32, 127);
}
return new string(chars);
}
///
/// Int32 fields are slightly strange - we need to keep the sign the same way it is:
/// negative numbers can munge to any other negative number (it'll always take
/// 10 bytes) but positive numbers have to stay positive, so we can't use the
/// full range of 32 bits.
///
private static int MungeInt32(int value) {
if (value < 0) {
return rng.Next(int.MinValue, 0);
}
int length = CodedOutputStream.ComputeRawVarint32Size((uint) value);
uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
uint max = length == 5 ? int.MaxValue : (1U << (length * 7)) - 1;
return (int) NextRandomUInt64(min, max);
}
private static uint MungeVarint32(uint original) {
int length = CodedOutputStream.ComputeRawVarint32Size(original);
uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
uint max = length == 5 ? uint.MaxValue : (1U << (length * 7)) - 1;
return (uint)NextRandomUInt64(min, max);
}
private static ulong MungeVarint64(ulong original) {
int length = CodedOutputStream.ComputeRawVarint64Size(original);
ulong min = length == 1 ? 0 : 1UL << ((length - 1) * 7);
ulong max = length == 10 ? ulong.MaxValue : (1UL<< (length * 7)) - 1;
return NextRandomUInt64(min, max);
}
///
/// Returns a random number in the range [min, max] (both inclusive).
///
private static ulong NextRandomUInt64(ulong min, ulong max) {
if (min > max) {
throw new ArgumentException("min must be <= max; min=" + min + "; max = " + max);
}
ulong range = max - min;
// This isn't actually terribly good at very large ranges - but it doesn't really matter for the sake
// of this program.
return min + (ulong)(range * rng.NextDouble());
}
private static object MungeEnum(FieldDescriptor fieldDescriptor, EnumValueDescriptor original) {
// Find all the values which get encoded to the same size as the current value, and pick one at random
int originalSize = CodedOutputStream.ComputeRawVarint32Size((uint)original.Number);
List sameSizeValues = new List ();
foreach (EnumValueDescriptor candidate in fieldDescriptor.EnumType.Values) {
if (CodedOutputStream.ComputeRawVarint32Size((uint)candidate.Number) == originalSize) {
sameSizeValues.Add(candidate);
}
}
return sameSizeValues[rng.Next(sameSizeValues.Count)];
}
private static object MungeByteString(ByteString byteString) {
byte[] data = new byte[byteString.Length];
rng.NextBytes(data);
return ByteString.CopyFrom(data);
}
}
}