aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Haberman <jhaberman@gmail.com>2016-05-03 13:11:20 -0700
committerJoshua Haberman <jhaberman@gmail.com>2016-05-03 13:11:20 -0700
commit07bcf21a9cbe600f45cba2fe38710a4ad5f7c5bc (patch)
treeb7f4a49aa5694c4de6dbd0ca88f8b15cfd90b91d
parent7dda312224b298fb857ddd791bed148d46aac94e (diff)
parent247ef1f0df4ebb08a2bd8d47912a9e42b88abdc2 (diff)
downloadprotobuf-07bcf21a9cbe600f45cba2fe38710a4ad5f7c5bc.tar.gz
protobuf-07bcf21a9cbe600f45cba2fe38710a4ad5f7c5bc.tar.bz2
protobuf-07bcf21a9cbe600f45cba2fe38710a4ad5f7c5bc.zip
Merge pull request #1464 from google/benchmarks
Added framework for generating/consuming benchmarking data sets.
-rw-r--r--Makefile.am6
-rw-r--r--benchmarks/Makefile.am66
-rw-r--r--benchmarks/README.md28
-rw-r--r--benchmarks/benchmark_messages_proto2.proto (renamed from benchmarks/google_speed.proto)19
-rw-r--r--benchmarks/benchmark_messages_proto3.proto76
-rw-r--r--benchmarks/benchmarks.proto63
-rw-r--r--benchmarks/generate_datasets.cc117
-rw-r--r--configure.ac2
-rwxr-xr-xtests.sh3
9 files changed, 370 insertions, 10 deletions
diff --git a/Makefile.am b/Makefile.am
index a7a1f413..3e988816 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign
SUBDIRS = . src
# Always include gmock in distributions.
-DIST_SUBDIRS = $(subdirs) src conformance
+DIST_SUBDIRS = $(subdirs) src conformance benchmarks
# Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS
# because then "make check" would also build and run all of gmock's own tests,
@@ -36,6 +36,10 @@ clean-local:
echo "Making clean in conformance"; \
cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \
fi; \
+ if test -e benchmarks/Makefile; then \
+ echo "Making clean in benchmarks"; \
+ cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \
+ fi; \
if test -e objectivec/DevTools; then \
echo "Cleaning any ObjC pyc files"; \
rm -f objectivec/DevTools/*.pyc; \
diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
new file mode 100644
index 00000000..1e162eb1
--- /dev/null
+++ b/benchmarks/Makefile.am
@@ -0,0 +1,66 @@
+
+benchmarks_protoc_inputs = \
+ benchmarks.proto \
+ benchmark_messages_proto3.proto
+
+benchmarks_protoc_inputs_proto2 = \
+ benchmark_messages_proto2.proto
+
+benchmarks_protoc_outputs = \
+ benchmarks.pb.cc \
+ benchmarks.pb.h \
+ benchmark_messages_proto3.pb.cc \
+ benchmark_messages_proto3.pb.h
+
+benchmarks_protoc_outputs_proto2 = \
+ benchmark_messages_proto2.pb.cc \
+ benchmark_messages_proto2.pb.h
+
+bin_PROGRAMS = generate-datasets
+
+generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
+generate_datasets_SOURCES = generate_datasets.cc
+generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
+nodist_generate_datasets_SOURCES = \
+ $(benchmarks_protoc_outputs) \
+ $(benchmarks_protoc_outputs_proto2)
+
+# Explicit deps because BUILT_SOURCES are only done before a "make all/check"
+# so a direct "make test_cpp" could fail if parallel enough.
+# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually
+generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h
+
+$(benchmarks_protoc_outputs): protoc_middleman
+$(benchmarks_protoc_outputs_proto2): protoc_middleman2
+
+CLEANFILES = \
+ $(benchmarks_protoc_outputs) \
+ $(benchmarks_protoc_outputs_proto2) \
+ protoc_middleman \
+ protoc_middleman2 \
+ dataset.*
+
+if USE_EXTERNAL_PROTOC
+
+protoc_middleman: $(benchmarks_protoc_inputs)
+ $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
+ touch protoc_middleman
+
+protoc_middleman2: $(benchmarks_protoc_inputs_proto2)
+ $(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
+ touch protoc_middleman2
+
+else
+
+# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
+# relative to srcdir, which may not be the same as the current directory when
+# building out-of-tree.
+protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
+ oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
+ touch protoc_middleman
+
+protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
+ oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
+ touch protoc_middleman
+
+endif
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 00000000..c9027805
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,28 @@
+
+# Protocol Buffers Benchmarks
+
+This directory contains benchmarking schemas and data sets that you
+can use to test a variety of performance scenarios against your
+protobuf language runtime.
+
+The schema for the datasets is described in `benchmarks.proto`.
+
+Generate the data sets like so:
+
+```
+$ make
+$ ./generate-datasets
+Wrote dataset: dataset.google_message1_proto3.pb
+Wrote dataset: dataset.google_message1_proto2.pb
+Wrote dataset: dataset.google_message2.pb
+$
+```
+
+Each data set will be written to its own file. Benchmarks will
+likely want to run several benchmarks against each data set (parse,
+serialize, possibly JSON, possibly using different APIs, etc).
+
+We would like to add more data sets. In general we will favor data sets
+that make the overall suite diverse without being too large or having
+too many similar tests. Ideally everyone can run through the entire
+suite without the test run getting too long.
diff --git a/benchmarks/google_speed.proto b/benchmarks/benchmark_messages_proto2.proto
index 16f6d678..01f67a1a 100644
--- a/benchmarks/google_speed.proto
+++ b/benchmarks/benchmark_messages_proto2.proto
@@ -1,11 +1,14 @@
+// Benchmark messages for proto2.
+
syntax = "proto2";
-package benchmarks;
+package benchmarks.proto2;
+option java_package = "com.google.protobuf.benchmarks";
-option java_outer_classname = "GoogleSpeed";
+// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
-message SpeedMessage1 {
+message GoogleMessage1 {
required string field1 = 1;
optional string field9 = 9;
optional string field18 = 18;
@@ -40,7 +43,7 @@ message SpeedMessage1 {
optional int32 field23 = 23 [default=0];
optional bool field24 = 24 [default=false];
optional int32 field25 = 25 [default=0];
- optional SpeedMessage1SubMessage field15 = 15;
+ optional GoogleMessage1SubMessage field15 = 15;
optional bool field78 = 78;
optional int32 field67 = 67 [default=0];
optional int32 field68 = 68;
@@ -49,7 +52,7 @@ message SpeedMessage1 {
optional int32 field131 = 131 [default=0];
}
-message SpeedMessage1SubMessage {
+message GoogleMessage1SubMessage {
optional int32 field1 = 1 [default=0];
optional int32 field2 = 2 [default=0];
optional int32 field3 = 3 [default=0];
@@ -72,7 +75,7 @@ message SpeedMessage1SubMessage {
optional uint64 field300 = 300;
}
-message SpeedMessage2 {
+message GoogleMessage2 {
optional string field1 = 1;
optional int64 field3 = 3;
optional int64 field4 = 4;
@@ -112,7 +115,7 @@ message SpeedMessage2 {
repeated int32 field73 = 73;
optional int32 field20 = 20 [default=0];
optional string field24 = 24;
- optional SpeedMessage2GroupedMessage field31 = 31;
+ optional GoogleMessage2GroupedMessage field31 = 31;
}
repeated string field128 = 128;
optional int64 field131 = 131;
@@ -123,7 +126,7 @@ message SpeedMessage2 {
optional bool field206 = 206 [default=false];
}
-message SpeedMessage2GroupedMessage {
+message GoogleMessage2GroupedMessage {
optional float field1 = 1;
optional float field2 = 2;
optional float field3 = 3 [default=0.0];
diff --git a/benchmarks/benchmark_messages_proto3.proto b/benchmarks/benchmark_messages_proto3.proto
new file mode 100644
index 00000000..32f58698
--- /dev/null
+++ b/benchmarks/benchmark_messages_proto3.proto
@@ -0,0 +1,76 @@
+// Benchmark messages for proto3.
+
+syntax = "proto3";
+
+package benchmarks.proto3;
+option java_package = "com.google.protobuf.benchmarks";
+
+// This is the default, but we specify it here explicitly.
+option optimize_for = SPEED;
+
+message GoogleMessage1 {
+ string field1 = 1;
+ string field9 = 9;
+ string field18 = 18;
+ bool field80 = 80;
+ bool field81 = 81;
+ int32 field2 = 2;
+ int32 field3 = 3;
+ int32 field280 = 280;
+ int32 field6 = 6;
+ int64 field22 = 22;
+ string field4 = 4;
+ repeated fixed64 field5 = 5;
+ bool field59 = 59;
+ string field7 = 7;
+ int32 field16 = 16;
+ int32 field130 = 130;
+ bool field12 = 12;
+ bool field17 = 17;
+ bool field13 = 13;
+ bool field14 = 14;
+ int32 field104 = 104;
+ int32 field100 = 100;
+ int32 field101 = 101;
+ string field102 = 102;
+ string field103 = 103;
+ int32 field29 = 29;
+ bool field30 = 30;
+ int32 field60 = 60;
+ int32 field271 = 271;
+ int32 field272 = 272;
+ int32 field150 = 150;
+ int32 field23 = 23;
+ bool field24 = 24;
+ int32 field25 = 25;
+ GoogleMessage1SubMessage field15 = 15;
+ bool field78 = 78;
+ int32 field67 = 67;
+ int32 field68 = 68;
+ int32 field128 = 128;
+ string field129 = 129;
+ int32 field131 = 131;
+}
+
+message GoogleMessage1SubMessage {
+ int32 field1 = 1;
+ int32 field2 = 2;
+ int32 field3 = 3;
+ string field15 = 15;
+ bool field12 = 12;
+ int64 field13 = 13;
+ int64 field14 = 14;
+ int32 field16 = 16;
+ int32 field19 = 19;
+ bool field20 = 20;
+ bool field28 = 28;
+ fixed64 field21 = 21;
+ int32 field22 = 22;
+ bool field23 = 23;
+ bool field206 = 206;
+ fixed32 field203 = 203;
+ int32 field204 = 204;
+ string field205 = 205;
+ uint64 field207 = 207;
+ uint64 field300 = 300;
+}
diff --git a/benchmarks/benchmarks.proto b/benchmarks/benchmarks.proto
new file mode 100644
index 00000000..51c0b548
--- /dev/null
+++ b/benchmarks/benchmarks.proto
@@ -0,0 +1,63 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+package benchmarks;
+option java_package = "com.google.protobuf.benchmarks";
+
+message BenchmarkDataset {
+ // Name of the benchmark dataset. This should be unique across all datasets.
+ // Should only contain word characters: [a-zA-Z0-9_]
+ string name = 1;
+
+ // Fully-qualified name of the protobuf message for this dataset.
+ // It will be one of the messages defined benchmark_messages_proto2.proto
+ // or benchmark_messages_proto3.proto.
+ //
+ // Implementations that do not support reflection can implement this with
+ // an explicit "if/else" chain that lists every known message defined
+ // in those files.
+ string message_name = 2;
+
+ // The payload(s) for this dataset. They should be parsed or serialized
+ // in sequence, in a loop, ie.
+ //
+ // while (!benchmarkDone) { // Benchmark runner decides when to exit.
+ // for (i = 0; i < benchmark.payload.length; i++) {
+ // parse(benchmark.payload[i])
+ // }
+ // }
+ //
+ // This is intended to let datasets include a variety of data to provide
+ // potentially more realistic results than just parsing the same message
+ // over and over. A single message parsed repeatedly could yield unusually
+ // good branch prediction performance.
+ repeated bytes payload = 3;
+}
diff --git a/benchmarks/generate_datasets.cc b/benchmarks/generate_datasets.cc
new file mode 100644
index 00000000..61e7adf1
--- /dev/null
+++ b/benchmarks/generate_datasets.cc
@@ -0,0 +1,117 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <fstream>
+#include <iostream>
+#include "benchmarks.pb.h"
+
+using benchmarks::BenchmarkDataset;
+using google::protobuf::Descriptor;
+using google::protobuf::DescriptorPool;
+using google::protobuf::Message;
+using google::protobuf::MessageFactory;
+
+std::set<std::string> names;
+
+const char *file_prefix = "dataset.";
+const char *file_suffix = ".pb";
+
+void WriteFileWithPayloads(const std::string& name,
+ const std::string& message_name,
+ const std::vector<std::string>& payload) {
+ if (!names.insert(name).second) {
+ std::cerr << "Duplicate test name: " << name << "\n";
+ abort();
+ }
+
+ // First verify that this message name exists in our set of benchmark messages
+ // and that these payloads are valid for the given message.
+ const Descriptor* d =
+ DescriptorPool::generated_pool()->FindMessageTypeByName(message_name);
+
+ if (!d) {
+ std::cerr << "For dataset " << name << ", no such message: "
+ << message_name << "\n";
+ abort();
+ }
+
+ Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New();
+
+ for (size_t i = 0; i < payload.size(); i++) {
+ if (!m->ParseFromString(payload[i])) {
+ std::cerr << "For dataset " << name << ", payload[" << i << "] fails "
+ << "to parse\n";
+ abort();
+ }
+ }
+
+ BenchmarkDataset dataset;
+ dataset.set_name(name);
+ dataset.set_message_name(message_name);
+ for (size_t i = 0; i < payload.size(); i++) {
+ dataset.add_payload()->assign(payload[i]);
+ }
+
+ std::ofstream writer;
+ std::string fname = file_prefix + name + file_suffix;
+ writer.open(fname.c_str());
+ dataset.SerializeToOstream(&writer);
+ writer.close();
+
+ std::cerr << "Wrote dataset: " << fname << "\n";
+}
+
+void WriteFile(const std::string& name, const std::string& message_name,
+ const std::string& payload) {
+ std::vector<std::string> payloads;
+ payloads.push_back(payload);
+ WriteFileWithPayloads(name, message_name, payloads);
+}
+
+std::string ReadFile(const std::string& name) {
+ std::ifstream file(name.c_str());
+ GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name <<
+ "', please make sure you are running "
+ "this command from the benchmarks/ "
+ "directory.\n";
+ return std::string((std::istreambuf_iterator<char>(file)),
+ std::istreambuf_iterator<char>());
+}
+
+int main() {
+ WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1",
+ ReadFile("google_message1.dat"));
+ WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1",
+ ReadFile("google_message1.dat"));
+
+ // Not in proto3 because it has a group, which is not supported.
+ WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2",
+ ReadFile("google_message2.dat"));
+}
diff --git a/configure.ac b/configure.ac
index 33a6c64d..d56a7047 100644
--- a/configure.ac
+++ b/configure.ac
@@ -180,5 +180,5 @@ export CFLAGS
export CXXFLAGS
AC_CONFIG_SUBDIRS([gmock])
-AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
+AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
AC_OUTPUT
diff --git a/tests.sh b/tests.sh
index fd81b764..6a9439a5 100755
--- a/tests.sh
+++ b/tests.sh
@@ -36,6 +36,9 @@ build_cpp() {
internal_build_cpp
make check -j2
cd conformance && make test_cpp && cd ..
+
+ # Verify benchmarking code can build successfully.
+ cd benchmarks && make && ./generate-datasets && cd ..
}
build_cpp_distcheck() {