From 2fc69b1561e1feaa1cbc56d1617f949d13352b97 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Fri, 5 Jan 2018 11:20:40 -0800 Subject: Add python benchmark --- benchmarks/Makefile.am | 79 +++++++++++++++++++++- benchmarks/README.md | 69 +++++++++++++++++-- benchmarks/__init__.py | 0 benchmarks/py_benchmark.py | 115 ++++++++++++++++++++++++++++++++ benchmarks/python_benchmark_messages.cc | 29 ++++++++ 5 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100755 benchmarks/py_benchmark.py create mode 100644 benchmarks/python_benchmark_messages.cc diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 582e9934..08eae398 100755 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -39,11 +39,11 @@ else # relative to srcdir, which may not be the same as the current directory when # building out-of-tree. protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) ) touch protoc_middleman protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) - oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs_proto2) ) + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) ) touch protoc_middleman2 endif @@ -155,6 +155,75 @@ java: protoc_middleman protoc_middleman2 java-benchmark ############# JAVA RULES END ############## + +############# PYTHON RULES ############## + +python_add_init: protoc_middleman protoc_middleman2 + all_file=`find tmp -type f -regex '.*\.py'` && \ + for file in $${all_file[@]}; do \ + path="$${file%/*}"; \ + while true; do \ + touch "$$path/__init__.py" && chmod +x "$$path/__init__.py"; \ + if [[ $$path != *"/"* ]]; then break; fi; \ + path=$${path%/*}; \ + done \ + done + +python_cpp_pkg_flags = `pkg-config --cflags --libs python` + +lib_LTLIBRARIES = libbenchmark_messages.la +libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc +libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la +libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic +libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags) +libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) +nodist_libbenchmark_messages_la_SOURCES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + $(benchmarks_protoc_outputs_proto2_header) \ + $(benchmarks_protoc_outputs_header) + +python-pure-python-benchmark: python_add_init + @echo "Writing shortcut script python-pure-python-benchmark..." + @echo '#! /bin/sh' > python-pure-python-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark + @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark + @echo cp py_benchmark.py tmp >> python-pure-python-benchmark + @echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark + @chmod +x python-pure-python-benchmark + +python-cpp-reflection-benchmark: python_add_init + @echo "Writing shortcut script python-cpp-reflection-benchmark..." + @echo '#! /bin/sh' > python-cpp-reflection-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark + @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark + @echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark + @echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark + @chmod +x python-cpp-reflection-benchmark + +python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la + @echo "Writing shortcut script python-cpp-generated-code-benchmark..." + @echo '#! /bin/sh' > python-cpp-generated-code-benchmark + @echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark + @echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark + @echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark + @echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark + @echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark + @chmod +x python-cpp-generated-code-benchmark + +python-pure-python: python-pure-python-benchmark + ./python-pure-python-benchmark $(all_data) + +python-cpp-reflection: python-cpp-reflection-benchmark + ./python-cpp-reflection-benchmark $(all_data) + +python-cpp-generated-code: python-cpp-generated-code-benchmark + ./python-cpp-generated-code-benchmark $(all_data) + +############# PYTHON RULES END ############## + MAINTAINERCLEANFILES = \ Makefile.in @@ -168,7 +237,11 @@ CLEANFILES = \ protoc_middleman \ protoc_middleman2 \ javac_middleman \ - java-benchmark + java-benchmark \ + python_cpp_proto_library \ + python-pure-python-benchmark \ + python-cpp-reflection-benchmark \ + python-cpp-generated-code-benchmark clean-local: -rm -rf tmp/* diff --git a/benchmarks/README.md b/benchmarks/README.md index 09c06907..71104d69 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -17,12 +17,25 @@ We are using [google/benchmark](https://github.com/google/benchmark) as the benchmark tool for testing cpp. This will be automaticly made during build the cpp benchmark. -### JAVA +### Java We're using maven to build the java benchmarks, which is the same as to build the Java protobuf. There're no other tools need to install. We're using [google/caliper](https://github.com/google/caliper) as benchmark tool, which can be automaticly included by maven. +### Python +We're using python C++ API for testing the generated +CPP proto version of python protobuf, which is also a prerequisite for Python +protobuf cpp implementation. You need to install the correct version of Python +C++ extension package before run generated CPP proto version of Python +protobuf's benchmark. e.g. under Ubuntu, you need to + +``` +$ sudo apt-get install python-dev +$ sudo apt-get install python3-dev +``` +And you also need to make sure `pkg-config` is installed. + ### Big data There's some optional big testing data which is not included in the directory initially, you need to @@ -38,34 +51,80 @@ After doing this the big data file will automaticly generated in the benchmark d To run all the benchmark dataset: -For java: +### Java: ``` $ make java ``` -For cpp: +### CPP: ``` $ make cpp ``` +### Python: + +We have three versions of python protobuf implementation: pure python, cpp reflection and +cpp generated code. To run these version benchmark, you need to: + +#### Pure Python: + +``` +$ make python-pure-python +``` + +#### CPP reflection: + +``` +$ make python-cpp-reflection +``` + +#### CPP generated code: + +``` +$ make python-cpp-generated-code +``` + To run a specific dataset: -For java: +### Java: ``` $ make java-benchmark $ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)] ``` -For cpp: +### CPP: ``` $ make cpp-benchmark $ ./cpp-benchmark $(specific generated dataset file name) ``` +### Python: + +#### Pure Python: + +``` +$ make python-pure-python-benchmark +$ ./python-pure-python-benchmark $(specific generated dataset file name) +``` + +#### CPP reflection: + +``` +$ make python-cpp-reflection-benchmark +$ ./python-cpp-reflection-benchmark $(specific generated dataset file name) +``` + +#### CPP generated code: + +``` +$ make python-cpp-generated-code-benchmark +$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name) +``` + ## Benchmark datasets Each data set is in the format of benchmarks.proto: diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/py_benchmark.py b/benchmarks/py_benchmark.py new file mode 100755 index 00000000..d8f13cc7 --- /dev/null +++ b/benchmarks/py_benchmark.py @@ -0,0 +1,115 @@ +import sys +import os +import timeit +import math +import fnmatch + +# CPP generated code must be linked before importing the generated Python code +# for the descriptor can be found in the pool +if len(sys.argv) < 2: + raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") +if sys.argv[1] == "true": + sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) + import libbenchmark_messages + sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) +elif sys.argv[1] != "false": + raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code") + +import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 +import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 +import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 +import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2 +import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2 +import benchmarks_pb2 + + +def run_one_test(filename): + data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString(data) + benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), + module="py_benchmark", + setup_method="init") + print "Message %s of dataset file %s" % \ + (benchmark_dataset.message_name, filename) + benchmark_util.set_test_method("parse_from_benchmark") + print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + benchmark_util.set_test_method("serialize_to_benchmark") + print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) + print "" + +def init(filename): + global benchmark_dataset, message_class, message_list, counter + message_list=[] + counter = 0 + data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString(data) + + if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1": + message_class = benchmark_message1_proto3_pb2.GoogleMessage1 + elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1": + message_class = benchmark_message1_proto2_pb2.GoogleMessage1 + elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2": + message_class = benchmark_message2_pb2.GoogleMessage2 + elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3": + message_class = benchmark_message3_pb2.GoogleMessage3 + elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4": + message_class = benchmark_message4_pb2.GoogleMessage4 + else: + raise IOError("Message %s not found!" % (benchmark_dataset.message_name)) + + for one_payload in benchmark_dataset.payload: + temp = message_class() + temp.ParseFromString(one_payload) + message_list.append(temp) + +def parse_from_benchmark(): + global counter, message_class, benchmark_dataset + m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) + counter = counter + 1 + +def serialize_to_benchmark(): + global counter, message_list, message_class + s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() + counter = counter + 1 + + +class Benchmark: + def __init__(self, module=None, test_method=None, + setup_method=None, full_iteration = 1): + self.full_iteration = full_iteration + self.module = module + self.test_method = test_method + self.setup_method = setup_method + + def set_test_method(self, test_method): + self.test_method = test_method + + def full_setup_code(self, setup_method_args=''): + setup_code = "" + setup_code += "from %s import %s\n" % (self.module, self.test_method) + setup_code += "from %s import %s\n" % (self.module, self.setup_method) + setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args) + return setup_code + + def dry_run(self, test_method_args='', setup_method_args=''): + return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), + setup=self.full_setup_code(setup_method_args), + number=self.full_iteration); + + def run_benchmark(self, test_method_args='', setup_method_args=''): + reps = self.full_iteration; + t = self.dry_run(test_method_args, setup_method_args); + if t < 3 : + reps = int(math.ceil(3 / t)) * self.full_iteration + t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), + setup=self.full_setup_code(setup_method_args), + number=reps); + return "Average time for %s: %.2f ns" % \ + (self.test_method, 1.0 * t / reps * (10 ** 9)) + + +if __name__ == "__main__": + for i in range(2, len(sys.argv)): + run_one_test(sys.argv[i]) diff --git a/benchmarks/python_benchmark_messages.cc b/benchmarks/python_benchmark_messages.cc new file mode 100644 index 00000000..55242a2a --- /dev/null +++ b/benchmarks/python_benchmark_messages.cc @@ -0,0 +1,29 @@ +#include + +#include "benchmarks.pb.h" +#include "datasets/google_message1/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" + +static PyMethodDef python_benchmark_methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +PyMODINIT_FUNC +initlibbenchmark_messages() { + benchmarks::BenchmarkDataset().descriptor(); + benchmarks::proto3::GoogleMessage1().descriptor(); + benchmarks::proto2::GoogleMessage1().descriptor(); + benchmarks::proto2::GoogleMessage2().descriptor(); + benchmarks::google_message3::GoogleMessage3().descriptor(); + benchmarks::google_message4::GoogleMessage4().descriptor(); + + PyObject *m; + + m = Py_InitModule("libbenchmark_messages", python_benchmark_methods); + if (m == NULL) + return; +} -- cgit v1.2.3