diff options
Diffstat (limited to 'src/ProtoMunge')
-rw-r--r-- | src/ProtoMunge/Program.cs | 228 | ||||
-rw-r--r-- | src/ProtoMunge/Properties/AssemblyInfo.cs | 36 | ||||
-rw-r--r-- | src/ProtoMunge/ProtoMunge.csproj | 59 | ||||
-rw-r--r-- | src/ProtoMunge/app.config | 3 |
4 files changed, 326 insertions, 0 deletions
diff --git a/src/ProtoMunge/Program.cs b/src/ProtoMunge/Program.cs new file mode 100644 index 00000000..704f5060 --- /dev/null +++ b/src/ProtoMunge/Program.cs @@ -0,0 +1,228 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.IO; +using System.Reflection; +using Google.ProtocolBuffers.Descriptors; + +namespace Google.ProtocolBuffers.ProtoMunge +{ + /// <summary> + /// Utility console application which takes a message descriptor and a corresponding message, + /// and produces a new message with similar but random data. The data is the same length + /// as the original, but with random values within appropriate bands. (For instance, a compressed + /// integer in the range 0-127 will end up as another integer in the same range, to keep the length + /// the same.) + /// TODO(jonskeet): Potentially refactor to use an instance instead, making it simpler to + /// be thread-safe for external use. + /// </summary> + public sealed class Program { + + static readonly Random rng = new Random(); + + static int Main(string[] args) { + if (args.Length != 3) { + Console.Error.WriteLine("Usage: ProtoMunge <descriptor type name> <input data> <output file>"); + Console.Error.WriteLine("The descriptor type name is the fully-qualified message name, including assembly."); + Console.Error.WriteLine("(At a future date it may be possible to do this without building the .NET assembly at all.)"); + return 1; + } + IMessage defaultMessage; + try { + defaultMessage = MessageUtil.GetDefaultMessage(args[0]); + } catch (ArgumentException e) { + Console.Error.WriteLine(e.Message); + return 1; + } + try { + IBuilder builder = defaultMessage.WeakCreateBuilderForType(); + byte[] inputData = File.ReadAllBytes(args[1]); + builder.WeakMergeFrom(ByteString.CopyFrom(inputData)); + IMessage original = builder.WeakBuild(); + IMessage munged = Munge(original); + if (original.SerializedSize != munged.SerializedSize) { + throw new Exception("Serialized sizes don't match"); + } + File.WriteAllBytes(args[2], munged.ToByteArray()); + return 0; + } catch (Exception e) { + Console.Error.WriteLine("Error: {0}", e.Message); + Console.Error.WriteLine(); + Console.Error.WriteLine("Detailed exception information: {0}", e); + return 1; + } + } + + /// <summary> + /// Munges a message recursively. + /// </summary> + /// <returns>A new message of the same type as the original message, + /// but munged so that all the data is desensitised.</returns> + private static IMessage Munge(IMessage message) { + IBuilder builder = message.WeakCreateBuilderForType(); + foreach (var pair in message.AllFields) { + if (pair.Key.IsRepeated) { + foreach (object singleValue in (IEnumerable)pair.Value) { + builder.WeakAddRepeatedField(pair.Key, CheckedMungeValue(pair.Key, singleValue)); + } + } else { + builder[pair.Key] = CheckedMungeValue(pair.Key, pair.Value); + } + } + IMessage munged = builder.WeakBuild(); + if (message.SerializedSize != munged.SerializedSize) { + Console.WriteLine("Sub message sizes: {0}/{1}", message.SerializedSize, munged.SerializedSize); + } + return munged; + } + + /// <summary> + /// Munges a single value and checks that the length ends up the same as it was before. + /// </summary> + private static object CheckedMungeValue(FieldDescriptor fieldDescriptor, object value) { + int currentSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, value); + object mungedValue = MungeValue(fieldDescriptor, value); + int mungedSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber, mungedValue); + // Exceptions log more easily than assertions + if (currentSize != mungedSize) { + throw new Exception("Munged value had wrong size. Field type: " + fieldDescriptor.FieldType + + "; old value: " + value + "; new value: " + mungedValue); + } + return mungedValue; + } + + /// <summary> + /// Munges a single value of the specified field descriptor. (i.e. if the field is + /// actually a repeated int, this method receives a single int value to munge, and + /// is called multiple times). + /// </summary> + private static object MungeValue(FieldDescriptor fieldDescriptor, object value) { + switch (fieldDescriptor.FieldType) { + case FieldType.SInt64: + case FieldType.Int64: + return (long) MungeVarint64((ulong) (long)value); + case FieldType.UInt64: + return MungeVarint64((ulong)value); + case FieldType.SInt32: + return (int)MungeVarint32((uint)(int)value); + case FieldType.Int32: + return MungeInt32((int) value); + case FieldType.UInt32: + return MungeVarint32((uint)value); + case FieldType.Double: + return rng.NextDouble(); + case FieldType.Float: + return (float)rng.NextDouble(); + case FieldType.Fixed64: { + byte[] data = new byte[8]; + rng.NextBytes(data); + return BitConverter.ToUInt64(data, 0); + } + case FieldType.Fixed32: { + byte[] data = new byte[4]; + rng.NextBytes(data); + return BitConverter.ToUInt32(data, 0); + } + case FieldType.Bool: + return rng.Next(2) == 1; + case FieldType.String: + return MungeString((string)value); + case FieldType.Group: + case FieldType.Message: + return Munge((IMessage)value); + case FieldType.Bytes: + return MungeByteString((ByteString)value); + case FieldType.SFixed64: { + byte[] data = new byte[8]; + rng.NextBytes(data); + return BitConverter.ToInt64(data, 0); + } + case FieldType.SFixed32: { + byte[] data = new byte[4]; + rng.NextBytes(data); + return BitConverter.ToInt32(data, 0); + } + case FieldType.Enum: + return MungeEnum(fieldDescriptor, (int) value); + default: + // TODO(jonskeet): Different exception? + throw new ArgumentException("Invalid field descriptor"); + } + } + + private static object MungeString(string original) { + foreach (char c in original) { + if (c > 127) { + throw new ArgumentException("Can't handle non-ascii yet"); + } + } + char[] chars = new char[original.Length]; + // Convert to pure ASCII - no control characters. + for (int i = 0; i < chars.Length; i++) { + chars[i] = (char) rng.Next(32, 127); + } + return new string(chars); + } + + /// <summary> + /// Int32 fields are slightly strange - we need to keep the sign the same way it is: + /// negative numbers can munge to any other negative number (it'll always take + /// 10 bytes) but positive numbers have to stay positive, so we can't use the + /// full range of 32 bits. + /// </summary> + private static int MungeInt32(int value) { + if (value < 0) { + return rng.Next(int.MinValue, 0); + } + int length = CodedOutputStream.ComputeRawVarint32Size((uint) value); + uint min = length == 1 ? 0 : 1U << ((length - 1) * 7); + uint max = length == 5 ? int.MaxValue : (1U << (length * 7)) - 1; + return (int) NextRandomUInt64(min, max); + } + + private static uint MungeVarint32(uint original) { + int length = CodedOutputStream.ComputeRawVarint32Size(original); + uint min = length == 1 ? 0 : 1U << ((length - 1) * 7); + uint max = length == 5 ? uint.MaxValue : (1U << (length * 7)) - 1; + return (uint)NextRandomUInt64(min, max); + } + + private static ulong MungeVarint64(ulong original) { + int length = CodedOutputStream.ComputeRawVarint64Size(original); + ulong min = length == 1 ? 0 : 1UL << ((length - 1) * 7); + ulong max = length == 10 ? ulong.MaxValue : (1UL<< (length * 7)) - 1; + return NextRandomUInt64(min, max); + } + + /// <summary> + /// Returns a random number in the range [min, max] (both inclusive). + /// </summary> + private static ulong NextRandomUInt64(ulong min, ulong max) { + if (min > max) { + throw new ArgumentException("min must be <= max; min=" + min + "; max = " + max); + } + ulong range = max - min; + // This isn't actually terribly good at very large ranges - but it doesn't really matter for the sake + // of this program. + return min + (ulong)(range * rng.NextDouble()); + } + + private static object MungeEnum(FieldDescriptor fieldDescriptor, int original) { + // Find all the values which get encoded to the same size as the current value, and pick one at random + int originalSize = CodedOutputStream.ComputeRawVarint32Size((uint)original); + List<EnumValueDescriptor> sameSizeValues = new List<EnumValueDescriptor> (); + foreach (EnumValueDescriptor candidate in fieldDescriptor.EnumType.Values) { + if (CodedOutputStream.ComputeRawVarint32Size((uint)candidate.Number) == originalSize) { + sameSizeValues.Add(candidate); + } + } + return sameSizeValues[rng.Next(sameSizeValues.Count)].Number; + } + + private static object MungeByteString(ByteString byteString) { + byte[] data = new byte[byteString.Length]; + rng.NextBytes(data); + return ByteString.CopyFrom(data); + } + } +}
\ No newline at end of file diff --git a/src/ProtoMunge/Properties/AssemblyInfo.cs b/src/ProtoMunge/Properties/AssemblyInfo.cs new file mode 100644 index 00000000..179b04be --- /dev/null +++ b/src/ProtoMunge/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("ProtoMunge")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("ProtoMunge")] +[assembly: AssemblyCopyright("Copyright © 2008")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("4d26ed0e-a6ca-4df9-bb87-59429d49b676")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/src/ProtoMunge/ProtoMunge.csproj b/src/ProtoMunge/ProtoMunge.csproj new file mode 100644 index 00000000..f8a9af9a --- /dev/null +++ b/src/ProtoMunge/ProtoMunge.csproj @@ -0,0 +1,59 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="3.5" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <PropertyGroup> + <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> + <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> + <ProductVersion>9.0.30729</ProductVersion> + <SchemaVersion>2.0</SchemaVersion> + <ProjectGuid>{8F09AF72-3327-4FA7-BC09-070B80221AB9}</ProjectGuid> + <OutputType>Exe</OutputType> + <AppDesignerFolder>Properties</AppDesignerFolder> + <RootNamespace>Google.ProtocolBuffers.ProtoMunge</RootNamespace> + <AssemblyName>ProtoMunge</AssemblyName> + <TargetFrameworkVersion>v2.0</TargetFrameworkVersion> + <FileAlignment>512</FileAlignment> + <TargetFrameworkSubset> + </TargetFrameworkSubset> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> + <DebugSymbols>true</DebugSymbols> + <DebugType>full</DebugType> + <Optimize>false</Optimize> + <OutputPath>bin\Debug\</OutputPath> + <DefineConstants>DEBUG;TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + </PropertyGroup> + <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> + <DebugType>pdbonly</DebugType> + <Optimize>true</Optimize> + <OutputPath>bin\Release\</OutputPath> + <DefineConstants>TRACE</DefineConstants> + <ErrorReport>prompt</ErrorReport> + <WarningLevel>4</WarningLevel> + </PropertyGroup> + <ItemGroup> + <Reference Include="System" /> + </ItemGroup> + <ItemGroup> + <Compile Include="Program.cs" /> + <Compile Include="Properties\AssemblyInfo.cs" /> + </ItemGroup> + <ItemGroup> + <ProjectReference Include="..\ProtocolBuffers\ProtocolBuffers.csproj"> + <Project>{6908BDCE-D925-43F3-94AC-A531E6DF2591}</Project> + <Name>ProtocolBuffers</Name> + </ProjectReference> + </ItemGroup> + <ItemGroup> + <None Include="app.config" /> + </ItemGroup> + <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> + <!-- To modify your build process, add your task inside one of the targets below and uncomment it. + Other similar extension points exist, see Microsoft.Common.targets. + <Target Name="BeforeBuild"> + </Target> + <Target Name="AfterBuild"> + </Target> + --> +</Project>
\ No newline at end of file diff --git a/src/ProtoMunge/app.config b/src/ProtoMunge/app.config new file mode 100644 index 00000000..df20690a --- /dev/null +++ b/src/ProtoMunge/app.config @@ -0,0 +1,3 @@ +<?xml version="1.0"?> +<configuration> +<startup><supportedRuntime version="v2.0.50727"/></startup></configuration> |