aboutsummaryrefslogtreecommitdiff
path: root/src/ProtoMunge
diff options
context:
space:
mode:
authorJon Skeet <skeet@pobox.com>2009-01-16 10:57:40 +0000
committerJon Skeet <skeet@pobox.com>2009-01-16 10:57:40 +0000
commit8f8186a30b9b8d865c6211eb91a4df8aae1f40d5 (patch)
tree8448e4c52607cd39f2035a06196cfe4247942f06 /src/ProtoMunge
parent0c89aa1fdba0158d8dee3aef9cfe815afe4cc494 (diff)
downloadprotobuf-8f8186a30b9b8d865c6211eb91a4df8aae1f40d5.tar.gz
protobuf-8f8186a30b9b8d865c6211eb91a4df8aae1f40d5.tar.bz2
protobuf-8f8186a30b9b8d865c6211eb91a4df8aae1f40d5.zip
Benchmarking, dumping and munging
Diffstat (limited to 'src/ProtoMunge')
-rw-r--r--src/ProtoMunge/Program.cs228
-rw-r--r--src/ProtoMunge/Properties/AssemblyInfo.cs36
-rw-r--r--src/ProtoMunge/ProtoMunge.csproj59
-rw-r--r--src/ProtoMunge/app.config3
4 files changed, 326 insertions, 0 deletions
diff --git a/src/ProtoMunge/Program.cs b/src/ProtoMunge/Program.cs
new file mode 100644
index 00000000..704f5060
--- /dev/null
+++ b/src/ProtoMunge/Program.cs
@@ -0,0 +1,228 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Reflection;
+using Google.ProtocolBuffers.Descriptors;
+
+namespace Google.ProtocolBuffers.ProtoMunge
+{
+ /// <summary>
+ /// Utility console application which takes a message descriptor and a corresponding message,
+ /// and produces a new message with similar but random data. The data is the same length
+ /// as the original, but with random values within appropriate bands. (For instance, a compressed
+ /// integer in the range 0-127 will end up as another integer in the same range, to keep the length
+ /// the same.)
+ /// TODO(jonskeet): Potentially refactor to use an instance instead, making it simpler to
+ /// be thread-safe for external use.
+ /// </summary>
+ public sealed class Program {
+
+ static readonly Random rng = new Random();
+
+ static int Main(string[] args) {
+ if (args.Length != 3) {
+ Console.Error.WriteLine("Usage: ProtoMunge <descriptor type name> <input data> <output file>");
+ Console.Error.WriteLine("The descriptor type name is the fully-qualified message name, including assembly.");
+ Console.Error.WriteLine("(At a future date it may be possible to do this without building the .NET assembly at all.)");
+ return 1;
+ }
+ IMessage defaultMessage;
+ try {
+ defaultMessage = MessageUtil.GetDefaultMessage(args[0]);
+ } catch (ArgumentException e) {
+ Console.Error.WriteLine(e.Message);
+ return 1;
+ }
+ try {
+ IBuilder builder = defaultMessage.WeakCreateBuilderForType();
+ byte[] inputData = File.ReadAllBytes(args[1]);
+ builder.WeakMergeFrom(ByteString.CopyFrom(inputData));
+ IMessage original = builder.WeakBuild();
+ IMessage munged = Munge(original);
+ if (original.SerializedSize != munged.SerializedSize) {
+ throw new Exception("Serialized sizes don't match");
+ }
+ File.WriteAllBytes(args[2], munged.ToByteArray());
+ return 0;
+ } catch (Exception e) {
+ Console.Error.WriteLine("Error: {0}", e.Message);
+ Console.Error.WriteLine();
+ Console.Error.WriteLine("Detailed exception information: {0}", e);
+ return 1;
+ }
+ }
+
+ /// <summary>
+ /// Munges a message recursively.
+ /// </summary>
+ /// <returns>A new message of the same type as the original message,
+ /// but munged so that all the data is desensitised.</returns>
+ private static IMessage Munge(IMessage message) {
+ IBuilder builder = message.WeakCreateBuilderForType();
+ foreach (var pair in message.AllFields) {
+ if (pair.Key.IsRepeated) {
+ foreach (object singleValue in (IEnumerable)pair.Value) {
+ builder.WeakAddRepeatedField(pair.Key, CheckedMungeValue(pair.Key, singleValue));
+ }
+ } else {
+ builder[pair.Key] = CheckedMungeValue(pair.Key, pair.Value);
+ }
+ }
+ IMessage munged = builder.WeakBuild();
+ if (message.SerializedSize != munged.SerializedSize) {
+ Console.WriteLine("Sub message sizes: {0}/{1}", message.SerializedSize, munged.SerializedSize);
+ }
+ return munged;
+ }
+
+ /// <summary>
+ /// Munges a single value and checks that the length ends up the same as it was before.
+ /// </summary>
+ private static object CheckedMungeValue(FieldDescriptor fieldDescriptor, object value) {
+ int currentSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, value);
+ object mungedValue = MungeValue(fieldDescriptor, value);
+ int mungedSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, mungedValue);
+ // Exceptions log more easily than assertions
+ if (currentSize != mungedSize) {
+ throw new Exception("Munged value had wrong size. Field type: " + fieldDescriptor.FieldType
+ + "; old value: " + value + "; new value: " + mungedValue);
+ }
+ return mungedValue;
+ }
+
+ /// <summary>
+ /// Munges a single value of the specified field descriptor. (i.e. if the field is
+ /// actually a repeated int, this method receives a single int value to munge, and
+ /// is called multiple times).
+ /// </summary>
+ private static object MungeValue(FieldDescriptor fieldDescriptor, object value) {
+ switch (fieldDescriptor.FieldType) {
+ case FieldType.SInt64:
+ case FieldType.Int64:
+ return (long) MungeVarint64((ulong) (long)value);
+ case FieldType.UInt64:
+ return MungeVarint64((ulong)value);
+ case FieldType.SInt32:
+ return (int)MungeVarint32((uint)(int)value);
+ case FieldType.Int32:
+ return MungeInt32((int) value);
+ case FieldType.UInt32:
+ return MungeVarint32((uint)value);
+ case FieldType.Double:
+ return rng.NextDouble();
+ case FieldType.Float:
+ return (float)rng.NextDouble();
+ case FieldType.Fixed64: {
+ byte[] data = new byte[8];
+ rng.NextBytes(data);
+ return BitConverter.ToUInt64(data, 0);
+ }
+ case FieldType.Fixed32: {
+ byte[] data = new byte[4];
+ rng.NextBytes(data);
+ return BitConverter.ToUInt32(data, 0);
+ }
+ case FieldType.Bool:
+ return rng.Next(2) == 1;
+ case FieldType.String:
+ return MungeString((string)value);
+ case FieldType.Group:
+ case FieldType.Message:
+ return Munge((IMessage)value);
+ case FieldType.Bytes:
+ return MungeByteString((ByteString)value);
+ case FieldType.SFixed64: {
+ byte[] data = new byte[8];
+ rng.NextBytes(data);
+ return BitConverter.ToInt64(data, 0);
+ }
+ case FieldType.SFixed32: {
+ byte[] data = new byte[4];
+ rng.NextBytes(data);
+ return BitConverter.ToInt32(data, 0);
+ }
+ case FieldType.Enum:
+ return MungeEnum(fieldDescriptor, (int) value);
+ default:
+ // TODO(jonskeet): Different exception?
+ throw new ArgumentException("Invalid field descriptor");
+ }
+ }
+
+ private static object MungeString(string original) {
+ foreach (char c in original) {
+ if (c > 127) {
+ throw new ArgumentException("Can't handle non-ascii yet");
+ }
+ }
+ char[] chars = new char[original.Length];
+ // Convert to pure ASCII - no control characters.
+ for (int i = 0; i < chars.Length; i++) {
+ chars[i] = (char) rng.Next(32, 127);
+ }
+ return new string(chars);
+ }
+
+ /// <summary>
+ /// Int32 fields are slightly strange - we need to keep the sign the same way it is:
+ /// negative numbers can munge to any other negative number (it'll always take
+ /// 10 bytes) but positive numbers have to stay positive, so we can't use the
+ /// full range of 32 bits.
+ /// </summary>
+ private static int MungeInt32(int value) {
+ if (value < 0) {
+ return rng.Next(int.MinValue, 0);
+ }
+ int length = CodedOutputStream.ComputeRawVarint32Size((uint) value);
+ uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
+ uint max = length == 5 ? int.MaxValue : (1U << (length * 7)) - 1;
+ return (int) NextRandomUInt64(min, max);
+ }
+
+ private static uint MungeVarint32(uint original) {
+ int length = CodedOutputStream.ComputeRawVarint32Size(original);
+ uint min = length == 1 ? 0 : 1U << ((length - 1) * 7);
+ uint max = length == 5 ? uint.MaxValue : (1U << (length * 7)) - 1;
+ return (uint)NextRandomUInt64(min, max);
+ }
+
+ private static ulong MungeVarint64(ulong original) {
+ int length = CodedOutputStream.ComputeRawVarint64Size(original);
+ ulong min = length == 1 ? 0 : 1UL << ((length - 1) * 7);
+ ulong max = length == 10 ? ulong.MaxValue : (1UL<< (length * 7)) - 1;
+ return NextRandomUInt64(min, max);
+ }
+
+ /// <summary>
+ /// Returns a random number in the range [min, max] (both inclusive).
+ /// </summary>
+ private static ulong NextRandomUInt64(ulong min, ulong max) {
+ if (min > max) {
+ throw new ArgumentException("min must be <= max; min=" + min + "; max = " + max);
+ }
+ ulong range = max - min;
+ // This isn't actually terribly good at very large ranges - but it doesn't really matter for the sake
+ // of this program.
+ return min + (ulong)(range * rng.NextDouble());
+ }
+
+ private static object MungeEnum(FieldDescriptor fieldDescriptor, int original) {
+ // Find all the values which get encoded to the same size as the current value, and pick one at random
+ int originalSize = CodedOutputStream.ComputeRawVarint32Size((uint)original);
+ List<EnumValueDescriptor> sameSizeValues = new List<EnumValueDescriptor> ();
+ foreach (EnumValueDescriptor candidate in fieldDescriptor.EnumType.Values) {
+ if (CodedOutputStream.ComputeRawVarint32Size((uint)candidate.Number) == originalSize) {
+ sameSizeValues.Add(candidate);
+ }
+ }
+ return sameSizeValues[rng.Next(sameSizeValues.Count)].Number;
+ }
+
+ private static object MungeByteString(ByteString byteString) {
+ byte[] data = new byte[byteString.Length];
+ rng.NextBytes(data);
+ return ByteString.CopyFrom(data);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ProtoMunge/Properties/AssemblyInfo.cs b/src/ProtoMunge/Properties/AssemblyInfo.cs
new file mode 100644
index 00000000..179b04be
--- /dev/null
+++ b/src/ProtoMunge/Properties/AssemblyInfo.cs
@@ -0,0 +1,36 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("ProtoMunge")]
+[assembly: AssemblyDescription("")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("ProtoMunge")]
+[assembly: AssemblyCopyright("Copyright © 2008")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("4d26ed0e-a6ca-4df9-bb87-59429d49b676")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
diff --git a/src/ProtoMunge/ProtoMunge.csproj b/src/ProtoMunge/ProtoMunge.csproj
new file mode 100644
index 00000000..f8a9af9a
--- /dev/null
+++ b/src/ProtoMunge/ProtoMunge.csproj
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="3.5" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProductVersion>9.0.30729</ProductVersion>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{8F09AF72-3327-4FA7-BC09-070B80221AB9}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Google.ProtocolBuffers.ProtoMunge</RootNamespace>
+ <AssemblyName>ProtoMunge</AssemblyName>
+ <TargetFrameworkVersion>v2.0</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ <TargetFrameworkSubset>
+ </TargetFrameworkSubset>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Program.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\ProtocolBuffers\ProtocolBuffers.csproj">
+ <Project>{6908BDCE-D925-43F3-94AC-A531E6DF2591}</Project>
+ <Name>ProtocolBuffers</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="app.config" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project> \ No newline at end of file
diff --git a/src/ProtoMunge/app.config b/src/ProtoMunge/app.config
new file mode 100644
index 00000000..df20690a
--- /dev/null
+++ b/src/ProtoMunge/app.config
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<configuration>
+<startup><supportedRuntime version="v2.0.50727"/></startup></configuration>