From 2e83110230b7e91b07835e9c718a1d6fbcb8b617 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 27 Apr 2016 18:22:22 -0700 Subject: Added framework for generating/consuming benchmarking data sets. This takes the code that was sitting in benchmarks/ already and makes it easier for language-specific benchmarks to consume. Future PRs will enhance this so that the language-specific benchmarks can report metrics back that will be tracked over time in PerfKit. --- Makefile.am | 6 +- benchmarks/Makefile.am | 75 +++++++++++++++ benchmarks/benchmark_messages_proto2.proto | 141 +++++++++++++++++++++++++++++ benchmarks/benchmark_messages_proto3.proto | 76 ++++++++++++++++ benchmarks/benchmarks.proto | 102 +++++++++++++++++++++ benchmarks/generate_datasets.cc | 114 +++++++++++++++++++++++ benchmarks/google_speed.proto | 138 ---------------------------- configure.ac | 2 +- 8 files changed, 514 insertions(+), 140 deletions(-) create mode 100644 benchmarks/Makefile.am create mode 100644 benchmarks/benchmark_messages_proto2.proto create mode 100644 benchmarks/benchmark_messages_proto3.proto create mode 100644 benchmarks/benchmarks.proto create mode 100644 benchmarks/generate_datasets.cc delete mode 100644 benchmarks/google_speed.proto diff --git a/Makefile.am b/Makefile.am index a7a1f413..3e988816 100644 --- a/Makefile.am +++ b/Makefile.am @@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign SUBDIRS = . src # Always include gmock in distributions. -DIST_SUBDIRS = $(subdirs) src conformance +DIST_SUBDIRS = $(subdirs) src conformance benchmarks # Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS # because then "make check" would also build and run all of gmock's own tests, @@ -36,6 +36,10 @@ clean-local: echo "Making clean in conformance"; \ cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \ fi; \ + if test -e benchmarks/Makefile; then \ + echo "Making clean in benchmarks"; \ + cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \ + fi; \ if test -e objectivec/DevTools; then \ echo "Cleaning any ObjC pyc files"; \ rm -f objectivec/DevTools/*.pyc; \ diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am new file mode 100644 index 00000000..79581ee9 --- /dev/null +++ b/benchmarks/Makefile.am @@ -0,0 +1,75 @@ + +benchmarks_protoc_inputs = \ + benchmarks.proto \ + benchmark_messages_proto3.proto + +benchmarks_protoc_inputs_proto2 = \ + benchmark_messages_proto2.proto + +benchmarks_protoc_outputs = \ + benchmarks.pb.cc \ + benchmarks.pb.h \ + benchmark_messages_proto3.pb.cc \ + benchmark_messages_proto3.pb.h + +benchmarks_protoc_outputs_proto2 = \ + benchmark_messages_proto2.pb.cc \ + benchmark_messages_proto2.pb.h + +bin_PROGRAMS = generate-datasets + +generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la +generate_datasets_SOURCES = generate_datasets.cc +generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) +nodist_generate_datasets_SOURCES = \ + google_message1.h \ + google_message2.h \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) + +# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check" +# so a direct "make test_cpp" could fail if parallel enough. +generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h + +$(benchmarks_protoc_outputs): protoc_middleman +$(benchmarks_protoc_outputs_proto2): protoc_middleman2 + +google_message1.h: google_message1.dat + xxd -i $< $@ + +google_message2.h: google_message2.dat + xxd -i $< $@ + +CLEANFILES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + google_message1.h \ + google_message2.h \ + protoc_middleman \ + protoc_middleman2 \ + dataset.* + +if USE_EXTERNAL_PROTOC + +protoc_middleman: $(benchmarks_protoc_inputs) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs) + touch protoc_middleman + +protoc_middleman2: $(benchmarks_protoc_inputs_proto2) + $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2) + touch protoc_middleman2 + +else + +# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is +# relative to srcdir, which may not be the same as the current directory when +# building out-of-tree. +protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) ) + touch protoc_middleman + +protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) ) + touch protoc_middleman + +endif diff --git a/benchmarks/benchmark_messages_proto2.proto b/benchmarks/benchmark_messages_proto2.proto new file mode 100644 index 00000000..c7103be5 --- /dev/null +++ b/benchmarks/benchmark_messages_proto2.proto @@ -0,0 +1,141 @@ +// Benchmark messages for proto2. + +syntax = "proto2"; + +package benchmarks.p2; +option java_package = "com.google.protobuf.benchmarks"; + +// This is the default, but we specify it here explicitly. +option optimize_for = SPEED; + +message GoogleMessage1 { + required string field1 = 1; + optional string field9 = 9; + optional string field18 = 18; + optional bool field80 = 80 [default=false]; + optional bool field81 = 81 [default=true]; + required int32 field2 = 2; + required int32 field3 = 3; + optional int32 field280 = 280; + optional int32 field6 = 6 [default=0]; + optional int64 field22 = 22; + optional string field4 = 4; + repeated fixed64 field5 = 5; + optional bool field59 = 59 [default=false]; + optional string field7 = 7; + optional int32 field16 = 16; + optional int32 field130 = 130 [default=0]; + optional bool field12 = 12 [default=true]; + optional bool field17 = 17 [default=true]; + optional bool field13 = 13 [default=true]; + optional bool field14 = 14 [default=true]; + optional int32 field104 = 104 [default=0]; + optional int32 field100 = 100 [default=0]; + optional int32 field101 = 101 [default=0]; + optional string field102 = 102; + optional string field103 = 103; + optional int32 field29 = 29 [default=0]; + optional bool field30 = 30 [default=false]; + optional int32 field60 = 60 [default=-1]; + optional int32 field271 = 271 [default=-1]; + optional int32 field272 = 272 [default=-1]; + optional int32 field150 = 150; + optional int32 field23 = 23 [default=0]; + optional bool field24 = 24 [default=false]; + optional int32 field25 = 25 [default=0]; + optional GoogleMessage1SubMessage field15 = 15; + optional bool field78 = 78; + optional int32 field67 = 67 [default=0]; + optional int32 field68 = 68; + optional int32 field128 = 128 [default=0]; + optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"]; + optional int32 field131 = 131 [default=0]; +} + +message GoogleMessage1SubMessage { + optional int32 field1 = 1 [default=0]; + optional int32 field2 = 2 [default=0]; + optional int32 field3 = 3 [default=0]; + optional string field15 = 15; + optional bool field12 = 12 [default=true]; + optional int64 field13 = 13; + optional int64 field14 = 14; + optional int32 field16 = 16; + optional int32 field19 = 19 [default=2]; + optional bool field20 = 20 [default=true]; + optional bool field28 = 28 [default=true]; + optional fixed64 field21 = 21; + optional int32 field22 = 22; + optional bool field23 = 23 [ default=false ]; + optional bool field206 = 206 [default=false]; + optional fixed32 field203 = 203; + optional int32 field204 = 204; + optional string field205 = 205; + optional uint64 field207 = 207; + optional uint64 field300 = 300; +} + +message GoogleMessage2 { + optional string field1 = 1; + optional int64 field3 = 3; + optional int64 field4 = 4; + optional int64 field30 = 30; + optional bool field75 = 75 [default=false]; + optional string field6 = 6; + optional bytes field2 = 2; + optional int32 field21 = 21 [default=0]; + optional int32 field71 = 71; + optional float field25 = 25; + optional int32 field109 = 109 [default=0]; + optional int32 field210 = 210 [default=0]; + optional int32 field211 = 211 [default=0]; + optional int32 field212 = 212 [default=0]; + optional int32 field213 = 213 [default=0]; + optional int32 field216 = 216 [default=0]; + optional int32 field217 = 217 [default=0]; + optional int32 field218 = 218 [default=0]; + optional int32 field220 = 220 [default=0]; + optional int32 field221 = 221 [default=0]; + optional float field222 = 222 [default=0.0]; + optional int32 field63 = 63; + + repeated group Group1 = 10 { + required float field11 = 11; + optional float field26 = 26; + optional string field12 = 12; + optional string field13 = 13; + repeated string field14 = 14; + required uint64 field15 = 15; + optional int32 field5 = 5; + optional string field27 = 27; + optional int32 field28 = 28; + optional string field29 = 29; + optional string field16 = 16; + repeated string field22 = 22; + repeated int32 field73 = 73; + optional int32 field20 = 20 [default=0]; + optional string field24 = 24; + optional GoogleMessage2GroupedMessage field31 = 31; + } + repeated string field128 = 128; + optional int64 field131 = 131; + repeated string field127 = 127; + optional int32 field129 = 129; + repeated int64 field130 = 130; + optional bool field205 = 205 [default=false]; + optional bool field206 = 206 [default=false]; +} + +message GoogleMessage2GroupedMessage { + optional float field1 = 1; + optional float field2 = 2; + optional float field3 = 3 [default=0.0]; + optional bool field4 = 4; + optional bool field5 = 5; + optional bool field6 = 6 [default=true]; + optional bool field7 = 7 [default=false]; + optional float field8 = 8; + optional bool field9 = 9; + optional float field10 = 10; + optional int64 field11 = 11; +} diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto new file mode 100644 index 00000000..4ea39c22 --- /dev/null +++ b/benchmarks/benchmark_messages_proto3.proto @@ -0,0 +1,76 @@ +// Benchmark messages for proto3. + +syntax = "proto3"; + +package benchmarks.p3; +option java_package = "com.google.protobuf.benchmarks"; + +// This is the default, but we specify it here explicitly. +option optimize_for = SPEED; + +message GoogleMessage1 { + string field1 = 1; + string field9 = 9; + string field18 = 18; + bool field80 = 80; + bool field81 = 81; + int32 field2 = 2; + int32 field3 = 3; + int32 field280 = 280; + int32 field6 = 6; + int64 field22 = 22; + string field4 = 4; + repeated fixed64 field5 = 5; + bool field59 = 59; + string field7 = 7; + int32 field16 = 16; + int32 field130 = 130; + bool field12 = 12; + bool field17 = 17; + bool field13 = 13; + bool field14 = 14; + int32 field104 = 104; + int32 field100 = 100; + int32 field101 = 101; + string field102 = 102; + string field103 = 103; + int32 field29 = 29; + bool field30 = 30; + int32 field60 = 60; + int32 field271 = 271; + int32 field272 = 272; + int32 field150 = 150; + int32 field23 = 23; + bool field24 = 24; + int32 field25 = 25; + GoogleMessage1SubMessage field15 = 15; + bool field78 = 78; + int32 field67 = 67; + int32 field68 = 68; + int32 field128 = 128; + string field129 = 129; + int32 field131 = 131; +} + +message GoogleMessage1SubMessage { + int32 field1 = 1; + int32 field2 = 2; + int32 field3 = 3; + string field15 = 15; + bool field12 = 12; + int64 field13 = 13; + int64 field14 = 14; + int32 field16 = 16; + int32 field19 = 19; + bool field20 = 20; + bool field28 = 28; + fixed64 field21 = 21; + int32 field22 = 22; + bool field23 = 23; + bool field206 = 206; + fixed32 field203 = 203; + int32 field204 = 204; + string field205 = 205; + uint64 field207 = 207; + uint64 field300 = 300; +} diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto new file mode 100644 index 00000000..a891eb9e --- /dev/null +++ b/benchmarks/benchmarks.proto @@ -0,0 +1,102 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; +package benchmarks; +option java_package = "com.google.protobuf.benchmarks"; + +message BenchmarkDataset { + // Name of the benchmark dataset. This should be unique across all datasets. + // Should only contain word characters: [a-zA-Z0-9_] + string name = 1; + + // Fully-qualified name of the protobuf message for this dataset. + // It will be one of the messages defined benchmark_messages.proto. + // Implementations that do not support reflection can implement this with + // an explicit "if/else" chain that lists every possible message defined + // in this file. + string message_name = 2; + + // The payload(s) for this dataset. They should be parsed or serialized + // in sequence, in a loop, ie. + // + // while (!benchmarkDone) { // Benchmark runner decides when to exit. + // for (i = 0; i < benchmark.payload.length; i++) { + // parse(benchmark.payload[i]) + // } + // } + // + // This is intended to let datasets include a variety of data to provide + // potentially more realistic results than just parsing the same message + // over and over. A single message parsed repeatedly could yield unusually + // good branch prediction performance. + repeated bytes payload = 3; +} + +// A benchmark can write out metrics that we will then upload to our metrics +// database for tracking over time. +message Metric { + // A unique ID for these results. Used for de-duping. + string guid = 1; + + // The tags specify exactly what benchmark was run against the dataset. + // The specific benchmark suite can decide what these mean, but here are + // some common tags that have a predefined meaning: + // + // - "dataset": for tests that pertain to a specific dataset. + // + // For example: + // + // # Tests parsing from binary proto string using arenas. + // tags={ + // dataset: "testalltypes", + // op: "parse", + // format: "binaryproto", + // input: "string" + // arena: "true" + // } + // + // # Tests serializing to JSON string. + // tags={ + // dataset: "testalltypes", + // op: "serialize", + // format: "json", + // input: "string" + // } + map labels = 2; + + // Unit of measurement for the metric: + // - a speed test might be "mb_per_second" or "ops_per_second" + // - a size test might be "kb". + string unit = 3; + + // Metric value. + double value = 4; +} diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc new file mode 100644 index 00000000..f6f30cd8 --- /dev/null +++ b/benchmarks/generate_datasets.cc @@ -0,0 +1,114 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +const char *file_prefix = "dataset."; +const char *file_suffix = ".pb"; + +#include +#include +#include "benchmarks.pb.h" +#include "google_message1.h" +#include "google_message2.h" + +using benchmarks::BenchmarkDataset; +using google::protobuf::Descriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::Message; +using google::protobuf::MessageFactory; + +#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr)) + +std::set names; + +void WriteFileWithPayloads(const std::string& name, + const std::string& message_name, + const std::vector& payload) { + if (!names.insert(name).second) { + std::cerr << "Duplicate test name: " << name << "\n"; + abort(); + } + + // First verify that this message name exists in our set of benchmark messages + // and that these payloads are valid for the given message. + const Descriptor* d = + DescriptorPool::generated_pool()->FindMessageTypeByName(message_name); + + if (!d) { + std::cerr << "For dataset " << name << ", no such message: " + << message_name << "\n"; + abort(); + } + + Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New(); + + for (size_t i = 0; i < payload.size(); i++) { + if (!m->ParseFromString(payload[i])) { + std::cerr << "For dataset " << name << ", payload[" << i << "] fails " + << "to parse\n"; + abort(); + } + } + + BenchmarkDataset dataset; + dataset.set_name(name); + dataset.set_message_name(message_name); + for (size_t i = 0; i < payload.size(); i++) { + dataset.add_payload()->assign(payload[i]); + } + + std::string serialized; + dataset.SerializeToString(&serialized); + + std::ofstream writer; + std::string fname = file_prefix + name + file_suffix; + writer.open(fname); + writer << serialized; + writer.close(); + + std::cerr << "Wrote dataset: " << fname << "\n"; +} + +void WriteFile(const std::string& name, const std::string& message_name, + const std::string& payload) { + std::vector payloads; + payloads.push_back(payload); + WriteFileWithPayloads(name, message_name, payloads); +} + +int main() { + WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", + ARRAY_TO_STRING(google_message1_dat)); + WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", + ARRAY_TO_STRING(google_message1_dat)); + + // Not in proto3 because it has a group, which is not supported. + WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", + ARRAY_TO_STRING(google_message2_dat)); +} diff --git a/benchmarks/google_speed.proto b/benchmarks/google_speed.proto deleted file mode 100644 index 16f6d678..00000000 --- a/benchmarks/google_speed.proto +++ /dev/null @@ -1,138 +0,0 @@ -syntax = "proto2"; - -package benchmarks; - -option java_outer_classname = "GoogleSpeed"; -option optimize_for = SPEED; - -message SpeedMessage1 { - required string field1 = 1; - optional string field9 = 9; - optional string field18 = 18; - optional bool field80 = 80 [default=false]; - optional bool field81 = 81 [default=true]; - required int32 field2 = 2; - required int32 field3 = 3; - optional int32 field280 = 280; - optional int32 field6 = 6 [default=0]; - optional int64 field22 = 22; - optional string field4 = 4; - repeated fixed64 field5 = 5; - optional bool field59 = 59 [default=false]; - optional string field7 = 7; - optional int32 field16 = 16; - optional int32 field130 = 130 [default=0]; - optional bool field12 = 12 [default=true]; - optional bool field17 = 17 [default=true]; - optional bool field13 = 13 [default=true]; - optional bool field14 = 14 [default=true]; - optional int32 field104 = 104 [default=0]; - optional int32 field100 = 100 [default=0]; - optional int32 field101 = 101 [default=0]; - optional string field102 = 102; - optional string field103 = 103; - optional int32 field29 = 29 [default=0]; - optional bool field30 = 30 [default=false]; - optional int32 field60 = 60 [default=-1]; - optional int32 field271 = 271 [default=-1]; - optional int32 field272 = 272 [default=-1]; - optional int32 field150 = 150; - optional int32 field23 = 23 [default=0]; - optional bool field24 = 24 [default=false]; - optional int32 field25 = 25 [default=0]; - optional SpeedMessage1SubMessage field15 = 15; - optional bool field78 = 78; - optional int32 field67 = 67 [default=0]; - optional int32 field68 = 68; - optional int32 field128 = 128 [default=0]; - optional string field129 = 129 [default="xxxxxxxxxxxxxxxxxxxxx"]; - optional int32 field131 = 131 [default=0]; -} - -message SpeedMessage1SubMessage { - optional int32 field1 = 1 [default=0]; - optional int32 field2 = 2 [default=0]; - optional int32 field3 = 3 [default=0]; - optional string field15 = 15; - optional bool field12 = 12 [default=true]; - optional int64 field13 = 13; - optional int64 field14 = 14; - optional int32 field16 = 16; - optional int32 field19 = 19 [default=2]; - optional bool field20 = 20 [default=true]; - optional bool field28 = 28 [default=true]; - optional fixed64 field21 = 21; - optional int32 field22 = 22; - optional bool field23 = 23 [ default=false ]; - optional bool field206 = 206 [default=false]; - optional fixed32 field203 = 203; - optional int32 field204 = 204; - optional string field205 = 205; - optional uint64 field207 = 207; - optional uint64 field300 = 300; -} - -message SpeedMessage2 { - optional string field1 = 1; - optional int64 field3 = 3; - optional int64 field4 = 4; - optional int64 field30 = 30; - optional bool field75 = 75 [default=false]; - optional string field6 = 6; - optional bytes field2 = 2; - optional int32 field21 = 21 [default=0]; - optional int32 field71 = 71; - optional float field25 = 25; - optional int32 field109 = 109 [default=0]; - optional int32 field210 = 210 [default=0]; - optional int32 field211 = 211 [default=0]; - optional int32 field212 = 212 [default=0]; - optional int32 field213 = 213 [default=0]; - optional int32 field216 = 216 [default=0]; - optional int32 field217 = 217 [default=0]; - optional int32 field218 = 218 [default=0]; - optional int32 field220 = 220 [default=0]; - optional int32 field221 = 221 [default=0]; - optional float field222 = 222 [default=0.0]; - optional int32 field63 = 63; - - repeated group Group1 = 10 { - required float field11 = 11; - optional float field26 = 26; - optional string field12 = 12; - optional string field13 = 13; - repeated string field14 = 14; - required uint64 field15 = 15; - optional int32 field5 = 5; - optional string field27 = 27; - optional int32 field28 = 28; - optional string field29 = 29; - optional string field16 = 16; - repeated string field22 = 22; - repeated int32 field73 = 73; - optional int32 field20 = 20 [default=0]; - optional string field24 = 24; - optional SpeedMessage2GroupedMessage field31 = 31; - } - repeated string field128 = 128; - optional int64 field131 = 131; - repeated string field127 = 127; - optional int32 field129 = 129; - repeated int64 field130 = 130; - optional bool field205 = 205 [default=false]; - optional bool field206 = 206 [default=false]; -} - -message SpeedMessage2GroupedMessage { - optional float field1 = 1; - optional float field2 = 2; - optional float field3 = 3 [default=0.0]; - optional bool field4 = 4; - optional bool field5 = 5; - optional bool field6 = 6 [default=true]; - optional bool field7 = 7 [default=false]; - optional float field8 = 8; - optional bool field9 = 9; - optional float field10 = 10; - optional int64 field11 = 11; -} diff --git a/configure.ac b/configure.ac index 33a6c64d..d56a7047 100644 --- a/configure.ac +++ b/configure.ac @@ -180,5 +180,5 @@ export CFLAGS export CXXFLAGS AC_CONFIG_SUBDIRS([gmock]) -AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) +AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc]) AC_OUTPUT -- cgit v1.2.3 From 30a2f70eb33a216c53c56f765f09aea63c0cf53b Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Wed, 27 Apr 2016 18:34:33 -0700 Subject: Added README describing the directory. --- benchmarks/README.md | 28 ++++++++++++++++++++++++++++ benchmarks/benchmarks.proto | 8 +++++--- 2 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 benchmarks/README.md diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 00000000..c9027805 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,28 @@ + +# Protocol Buffers Benchmarks + +This directory contains benchmarking schemas and data sets that you +can use to test a variety of performance scenarios against your +protobuf language runtime. + +The schema for the datasets is described in `benchmarks.proto`. + +Generate the data sets like so: + +``` +$ make +$ ./generate-datasets +Wrote dataset: dataset.google_message1_proto3.pb +Wrote dataset: dataset.google_message1_proto2.pb +Wrote dataset: dataset.google_message2.pb +$ +``` + +Each data set will be written to its own file. Benchmarks will +likely want to run several benchmarks against each data set (parse, +serialize, possibly JSON, possibly using different APIs, etc). + +We would like to add more data sets. In general we will favor data sets +that make the overall suite diverse without being too large or having +too many similar tests. Ideally everyone can run through the entire +suite without the test run getting too long. diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index a891eb9e..5c2706df 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -38,10 +38,12 @@ message BenchmarkDataset { string name = 1; // Fully-qualified name of the protobuf message for this dataset. - // It will be one of the messages defined benchmark_messages.proto. + // It will be one of the messages defined benchmark_messages_proto2.proto + // or benchmark_messages_proto3.proto. + // // Implementations that do not support reflection can implement this with - // an explicit "if/else" chain that lists every possible message defined - // in this file. + // an explicit "if/else" chain that lists every known message defined + // in those files. string message_name = 2; // The payload(s) for this dataset. They should be parsed or serialized -- cgit v1.2.3 From 1ce5bd8e84085a3f89017d71442c75fd4a8dc9f6 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 09:39:26 -0700 Subject: Updates for PR comments. --- benchmarks/Makefile.am | 3 ++- benchmarks/benchmarks.proto | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 79581ee9..f0ed4899 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -27,8 +27,9 @@ nodist_generate_datasets_SOURCES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) -# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check" +# Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. +# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h $(benchmarks_protoc_outputs): protoc_middleman diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index 5c2706df..0ac3bf33 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -68,16 +68,16 @@ message Metric { // A unique ID for these results. Used for de-duping. string guid = 1; - // The tags specify exactly what benchmark was run against the dataset. + // The labels specify exactly what benchmark was run against the dataset. // The specific benchmark suite can decide what these mean, but here are - // some common tags that have a predefined meaning: + // some common labels that have a predefined meaning: // // - "dataset": for tests that pertain to a specific dataset. // // For example: // // # Tests parsing from binary proto string using arenas. - // tags={ + // labels={ // dataset: "testalltypes", // op: "parse", // format: "binaryproto", @@ -86,7 +86,7 @@ message Metric { // } // // # Tests serializing to JSON string. - // tags={ + // labels={ // dataset: "testalltypes", // op: "serialize", // format: "json", -- cgit v1.2.3 From cb36bde04716436fc9560ac908ca4551bdc614fb Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 09:52:20 -0700 Subject: Make the C++ tests build the benchmarking code. --- tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests.sh b/tests.sh index fd81b764..75bf9001 100755 --- a/tests.sh +++ b/tests.sh @@ -36,6 +36,9 @@ build_cpp() { internal_build_cpp make check -j2 cd conformance && make test_cpp && cd .. + + # Verify benchmarking code can build successfully. + cd benchmarks && make && cd .. } build_cpp_distcheck() { -- cgit v1.2.3 From 49a8918e9742d4bc9f577df9599061e342516b96 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 10:19:03 -0700 Subject: Read files directly from filesystem since xxd isn't always available. --- benchmarks/Makefile.am | 12 +----------- benchmarks/generate_datasets.cc | 18 +++++++++++++----- tests.sh | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index f0ed4899..1e162eb1 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -22,30 +22,20 @@ generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la generate_datasets_SOURCES = generate_datasets.cc generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) nodist_generate_datasets_SOURCES = \ - google_message1.h \ - google_message2.h \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) # Explicit deps because BUILT_SOURCES are only done before a "make all/check" # so a direct "make test_cpp" could fail if parallel enough. # See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually -generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h +generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h $(benchmarks_protoc_outputs): protoc_middleman $(benchmarks_protoc_outputs_proto2): protoc_middleman2 -google_message1.h: google_message1.dat - xxd -i $< $@ - -google_message2.h: google_message2.dat - xxd -i $< $@ - CLEANFILES = \ $(benchmarks_protoc_outputs) \ $(benchmarks_protoc_outputs_proto2) \ - google_message1.h \ - google_message2.h \ protoc_middleman \ protoc_middleman2 \ dataset.* diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index f6f30cd8..8e9b441c 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -34,8 +34,6 @@ const char *file_suffix = ".pb"; #include #include #include "benchmarks.pb.h" -#include "google_message1.h" -#include "google_message2.h" using benchmarks::BenchmarkDataset; using google::protobuf::Descriptor; @@ -102,13 +100,23 @@ void WriteFile(const std::string& name, const std::string& message_name, WriteFileWithPayloads(name, message_name, payloads); } +std::string ReadFile(const std::string& name) { + std::ifstream file(name); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << + "', please make sure you are running " + "this command from the benchmarks/ " + "directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + int main() { WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", - ARRAY_TO_STRING(google_message1_dat)); + ReadFile("google_message1.dat")); WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", - ARRAY_TO_STRING(google_message1_dat)); + ReadFile("google_message1.dat")); // Not in proto3 because it has a group, which is not supported. WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", - ARRAY_TO_STRING(google_message2_dat)); + ReadFile("google_message2.dat")); } diff --git a/tests.sh b/tests.sh index 75bf9001..6a9439a5 100755 --- a/tests.sh +++ b/tests.sh @@ -38,7 +38,7 @@ build_cpp() { cd conformance && make test_cpp && cd .. # Verify benchmarking code can build successfully. - cd benchmarks && make && cd .. + cd benchmarks && make && ./generate-datasets && cd .. } build_cpp_distcheck() { -- cgit v1.2.3 From b2d4b1a528a4b2e808ac2924e0552e4ea94c1d87 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Fri, 29 Apr 2016 10:22:56 -0700 Subject: Fixed for pre-C++11 ifstream which does not accept std::string. --- benchmarks/generate_datasets.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index 8e9b441c..dab635e7 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -86,7 +86,7 @@ void WriteFileWithPayloads(const std::string& name, std::ofstream writer; std::string fname = file_prefix + name + file_suffix; - writer.open(fname); + writer.open(fname.c_str()); writer << serialized; writer.close(); @@ -101,7 +101,7 @@ void WriteFile(const std::string& name, const std::string& message_name, } std::string ReadFile(const std::string& name) { - std::ifstream file(name); + std::ifstream file(name.c_str()); GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name << "', please make sure you are running " "this command from the benchmarks/ " -- cgit v1.2.3 From 247ef1f0df4ebb08a2bd8d47912a9e42b88abdc2 Mon Sep 17 00:00:00 2001 From: Josh Haberman Date: Tue, 3 May 2016 12:53:49 -0700 Subject: Addressed PR comments. --- benchmarks/benchmark_messages_proto2.proto | 2 +- benchmarks/benchmark_messages_proto3.proto | 2 +- benchmarks/benchmarks.proto | 41 ------------------------------ benchmarks/generate_datasets.cc | 19 +++++--------- 4 files changed, 9 insertions(+), 55 deletions(-) diff --git a/benchmarks/benchmark_messages_proto2.proto b/benchmarks/benchmark_messages_proto2.proto index c7103be5..01f67a1a 100644 --- a/benchmarks/benchmark_messages_proto2.proto +++ b/benchmarks/benchmark_messages_proto2.proto @@ -2,7 +2,7 @@ syntax = "proto2"; -package benchmarks.p2; +package benchmarks.proto2; option java_package = "com.google.protobuf.benchmarks"; // This is the default, but we specify it here explicitly. diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto index 4ea39c22..32f58698 100644 --- a/benchmarks/benchmark_messages_proto3.proto +++ b/benchmarks/benchmark_messages_proto3.proto @@ -2,7 +2,7 @@ syntax = "proto3"; -package benchmarks.p3; +package benchmarks.proto3; option java_package = "com.google.protobuf.benchmarks"; // This is the default, but we specify it here explicitly. diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto index 0ac3bf33..51c0b548 100644 --- a/benchmarks/benchmarks.proto +++ b/benchmarks/benchmarks.proto @@ -61,44 +61,3 @@ message BenchmarkDataset { // good branch prediction performance. repeated bytes payload = 3; } - -// A benchmark can write out metrics that we will then upload to our metrics -// database for tracking over time. -message Metric { - // A unique ID for these results. Used for de-duping. - string guid = 1; - - // The labels specify exactly what benchmark was run against the dataset. - // The specific benchmark suite can decide what these mean, but here are - // some common labels that have a predefined meaning: - // - // - "dataset": for tests that pertain to a specific dataset. - // - // For example: - // - // # Tests parsing from binary proto string using arenas. - // labels={ - // dataset: "testalltypes", - // op: "parse", - // format: "binaryproto", - // input: "string" - // arena: "true" - // } - // - // # Tests serializing to JSON string. - // labels={ - // dataset: "testalltypes", - // op: "serialize", - // format: "json", - // input: "string" - // } - map labels = 2; - - // Unit of measurement for the metric: - // - a speed test might be "mb_per_second" or "ops_per_second" - // - a size test might be "kb". - string unit = 3; - - // Metric value. - double value = 4; -} diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc index dab635e7..61e7adf1 100644 --- a/benchmarks/generate_datasets.cc +++ b/benchmarks/generate_datasets.cc @@ -28,9 +28,6 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -const char *file_prefix = "dataset."; -const char *file_suffix = ".pb"; - #include #include #include "benchmarks.pb.h" @@ -41,10 +38,11 @@ using google::protobuf::DescriptorPool; using google::protobuf::Message; using google::protobuf::MessageFactory; -#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr)) - std::set names; +const char *file_prefix = "dataset."; +const char *file_suffix = ".pb"; + void WriteFileWithPayloads(const std::string& name, const std::string& message_name, const std::vector& payload) { @@ -81,13 +79,10 @@ void WriteFileWithPayloads(const std::string& name, dataset.add_payload()->assign(payload[i]); } - std::string serialized; - dataset.SerializeToString(&serialized); - std::ofstream writer; std::string fname = file_prefix + name + file_suffix; writer.open(fname.c_str()); - writer << serialized; + dataset.SerializeToOstream(&writer); writer.close(); std::cerr << "Wrote dataset: " << fname << "\n"; @@ -111,12 +106,12 @@ std::string ReadFile(const std::string& name) { } int main() { - WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1", + WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1", ReadFile("google_message1.dat")); - WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1", + WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1", ReadFile("google_message1.dat")); // Not in proto3 because it has a group, which is not supported. - WriteFile("google_message2", "benchmarks.p2.GoogleMessage2", + WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2", ReadFile("google_message2.dat")); } -- cgit v1.2.3