From 3783a4ca176fa915da1b9dbb577ce9fd5e69bbb4 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Wed, 18 Jul 2018 13:45:32 -0700 Subject: Sync internal benchmark changes --- benchmarks/util/result_parser.py | 218 +++++++++++++++++++++++++++ benchmarks/util/result_uploader.py | 94 ++++++++++++ benchmarks/util/run_and_upload.py | 292 ------------------------------------- kokoro/linux/benchmark/build.sh | 2 +- 4 files changed, 313 insertions(+), 293 deletions(-) create mode 100755 benchmarks/util/result_parser.py create mode 100755 benchmarks/util/result_uploader.py delete mode 100755 benchmarks/util/run_and_upload.py diff --git a/benchmarks/util/result_parser.py b/benchmarks/util/result_parser.py new file mode 100755 index 00000000..a843923a --- /dev/null +++ b/benchmarks/util/result_parser.py @@ -0,0 +1,218 @@ +# This import depends on the automake rule protoc_middleman, please make sure +# protoc_middleman has been built before run this file. +import json +import re +import os.path +# BEGIN OPENSOURCE +import sys +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +# END OPENSOURCE +import tmp.benchmarks_pb2 as benchmarks_pb2 + +__file_size_map = {} + +def __get_data_size(filename): + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + "/../" + filename + if filename in __file_size_map: + return __file_size_map[filename] + benchmark_dataset = benchmarks_pb2.BenchmarkDataset() + benchmark_dataset.ParseFromString( + open(filename).read()) + size = 0 + count = 0 + for payload in benchmark_dataset.payload: + size += len(payload) + count += 1 + __file_size_map[filename] = (size, 1.0 * size / count) + return size, 1.0 * size / count + + +def __extract_file_name(file_name): + name_list = re.split("[/\.]", file_name) + short_file_name = "" + for name in name_list: + if name[:14] == "google_message": + short_file_name = name + return short_file_name + + +__results = [] + + +# CPP results example: +# [ +# "benchmarks": [ +# { +# "bytes_per_second": int, +# "cpu_time": int, +# "name: string, +# "time_unit: string, +# ... +# }, +# ... +# ], +# ... +# ] +def __parse_cpp_result(filename): + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for benchmark in results["benchmarks"]: + data_filename = "".join( + re.split("(_parse_|_serialize)", benchmark["name"])[0]) + behavior = benchmark["name"][len(data_filename) + 1:] + if data_filename[:2] == "BM": + data_filename = data_filename[3:] + __results.append({ + "language": "cpp", + "dataFilename": data_filename, + "behavior": behavior, + "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 + }) + + +# Python results example: +# [ +# [ +# { +# "filename": string, +# "benchmarks": { +# behavior: results, +# ... +# }, +# "message_name": STRING +# }, +# ... +# ], #pure-python +# ... +# ] +def __parse_python_result(filename): + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results_list = json.loads(f.read()) + for results in results_list: + for result in results: + _, avg_size = __get_data_size(result["filename"]) + for behavior in result["benchmarks"]: + __results.append({ + "language": "python", + "dataFilename": __extract_file_name(result["filename"]), + "behavior": behavior, + "throughput": avg_size / + result["benchmarks"][behavior] * 1e9 / 2 ** 20 + }) + + +# Java results example: +# [ +# { +# "id": string, +# "instrumentSpec": {...}, +# "measurements": [ +# { +# "weight": float, +# "value": { +# "magnitude": float, +# "unit": string +# }, +# ... +# }, +# ... +# ], +# "run": {...}, +# "scenario": { +# "benchmarkSpec": { +# "methodName": string, +# "parameters": { +# defined parameters in the benchmark: parameters value +# }, +# ... +# }, +# ... +# } +# +# }, +# ... +# ] +def __parse_java_result(filename): + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + results = json.loads(f.read()) + for result in results: + total_weight = 0 + total_value = 0 + for measurement in result["measurements"]: + total_weight += measurement["weight"] + total_value += measurement["value"]["magnitude"] + avg_time = total_value * 1.0 / total_weight + total_size, _ = __get_data_size( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + __results.append({ + "language": "java", + "throughput": total_size / avg_time * 1e9 / 2 ** 20, + "behavior": result["scenario"]["benchmarkSpec"]["methodName"], + "dataFilename": __extract_file_name( + result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) + }) + + +# Go benchmark results: +# +# goos: linux +# goarch: amd64 +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op +# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op +# PASS +# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s +def __parse_go_result(filename): + if filename == "": + return + if filename[0] != '/': + filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename + with open(filename) as f: + for line in f: + result_list = re.split("[\ \t]+", line) + if result_list[0][:9] != "Benchmark": + continue + first_slash_index = result_list[0].find('/') + last_slash_index = result_list[0].rfind('/') + full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix + total_bytes, _ = __get_data_size(full_filename) + behavior_with_suffix = result_list[0][last_slash_index+1:] + last_dash = behavior_with_suffix.rfind("-") + if last_dash == -1: + behavior = behavior_with_suffix + else: + behavior = behavior_with_suffix[:last_dash] + __results.append({ + "dataFilename": __extract_file_name(full_filename), + "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, + "behavior": behavior, + "language": "go" + }) + +def get_result_from_file(cpp_file="", java_file="", python_file="", go_file=""): + results = {} + if cpp_file != "": + __parse_cpp_result(cpp_file) + if java_file != "": + __parse_java_result(java_file) + if python_file != "": + __parse_python_result(python_file) + if go_file != "": + __parse_go_result(go_file) + + return __results \ No newline at end of file diff --git a/benchmarks/util/result_uploader.py b/benchmarks/util/result_uploader.py new file mode 100755 index 00000000..a667da02 --- /dev/null +++ b/benchmarks/util/result_uploader.py @@ -0,0 +1,94 @@ +from __future__ import print_function +from __future__ import absolute_import +import argparse +import os +import re +import copy +import uuid +import calendar +import time +import datetime + +from util import big_query_utils +from util import result_parser + +_PROJECT_ID = 'grpc-testing' +_DATASET = 'protobuf_benchmark_result' +_TABLE = 'opensource_result_v2' +_NOW = "%d%02d%02d" % (datetime.datetime.now().year, + datetime.datetime.now().month, + datetime.datetime.now().day) + +_INITIAL_TIME = calendar.timegm(time.gmtime()) + +def get_metadata(): + build_number = os.getenv('BUILD_NUMBER') + build_url = os.getenv('BUILD_URL') + job_name = os.getenv('JOB_NAME') + git_commit = os.getenv('GIT_COMMIT') + # actual commit is the actual head of PR that is getting tested + git_actual_commit = os.getenv('ghprbActualCommit') + + utc_timestamp = str(calendar.timegm(time.gmtime())) + metadata = {'created': utc_timestamp} + + if build_number: + metadata['buildNumber'] = build_number + if build_url: + metadata['buildUrl'] = build_url + if job_name: + metadata['jobName'] = job_name + if git_commit: + metadata['gitCommit'] = git_commit + if git_actual_commit: + metadata['gitActualCommit'] = git_actual_commit + + return metadata + + +def upload_result(result_list, metadata): + for result in result_list: + new_result = {} + new_result["metric"] = "throughput" + new_result["value"] = result["throughput"] + new_result["unit"] = "MB/s" + new_result["test"] = "protobuf_benchmark" + new_result["product_name"] = "protobuf" + labels_string = "" + for key in result: + labels_string += ",|%s:%s|" % (key, result[key]) + new_result["labels"] = labels_string[1:] + new_result["timestamp"] = _INITIAL_TIME + + bq = big_query_utils.create_big_query() + row = big_query_utils.make_row(str(uuid.uuid4()), new_result) + if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, + _TABLE + "$" + _NOW, + [row]): + print('Error when uploading result', new_result) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("-cpp", "--cpp_input_file", + help="The CPP benchmark result file's name", + default="") + parser.add_argument("-java", "--java_input_file", + help="The Java benchmark result file's name", + default="") + parser.add_argument("-python", "--python_input_file", + help="The Python benchmark result file's name", + default="") + parser.add_argument("-go", "--go_input_file", + help="The golang benchmark result file's name", + default="") + args = parser.parse_args() + + metadata = get_metadata() + print("uploading results...") + upload_result(result_parser.get_result_from_file( + cpp_file=args.cpp_input_file, + java_file=args.java_input_file, + python_file=args.python_input_file, + go_file=args.go_input_file + ), metadata) \ No newline at end of file diff --git a/benchmarks/util/run_and_upload.py b/benchmarks/util/run_and_upload.py deleted file mode 100755 index 43c9fa2d..00000000 --- a/benchmarks/util/run_and_upload.py +++ /dev/null @@ -1,292 +0,0 @@ -from __future__ import print_function -from __future__ import absolute_import -import argparse -import os -import re -import copy -import uuid -import calendar -import time -from . import big_query_utils -import datetime -import json -# This import depends on the automake rule protoc_middleman, please make sure -# protoc_middleman has been built before run this file. -import os.path, sys -sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) -import tmp.benchmarks_pb2 as benchmarks_pb2 -from click.types import STRING - -_PROJECT_ID = 'grpc-testing' -_DATASET = 'protobuf_benchmark_result' -_TABLE = 'opensource_result_v1' -_NOW = "%d%02d%02d" % (datetime.datetime.now().year, - datetime.datetime.now().month, - datetime.datetime.now().day) - -file_size_map = {} - -def get_data_size(file_name): - if file_name in file_size_map: - return file_size_map[file_name] - benchmark_dataset = benchmarks_pb2.BenchmarkDataset() - benchmark_dataset.ParseFromString( - open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) - size = 0 - count = 0 - for payload in benchmark_dataset.payload: - size += len(payload) - count += 1 - file_size_map[file_name] = (size, 1.0 * size / count) - return size, 1.0 * size / count - - -def extract_file_name(file_name): - name_list = re.split("[/\.]", file_name) - short_file_name = "" - for name in name_list: - if name[:14] == "google_message": - short_file_name = name - return short_file_name - - -cpp_result = [] -python_result = [] -java_result = [] -go_result = [] - - -# CPP results example: -# [ -# "benchmarks": [ -# { -# "bytes_per_second": int, -# "cpu_time": int, -# "name: string, -# "time_unit: string, -# ... -# }, -# ... -# ], -# ... -# ] -def parse_cpp_result(filename): - global cpp_result - if filename == "": - return - if filename[0] != '/': - filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename - with open(filename) as f: - results = json.loads(f.read()) - for benchmark in results["benchmarks"]: - data_filename = "".join( - re.split("(_parse_|_serialize)", benchmark["name"])[0]) - behavior = benchmark["name"][len(data_filename) + 1:] - cpp_result.append({ - "language": "cpp", - "dataFileName": data_filename, - "behavior": behavior, - "throughput": benchmark["bytes_per_second"] / 2.0 ** 20 - }) - - -# Python results example: -# [ -# [ -# { -# "filename": string, -# "benchmarks": { -# behavior: results, -# ... -# }, -# "message_name": STRING -# }, -# ... -# ], #pure-python -# ... -# ] -def parse_python_result(filename): - global python_result - if filename == "": - return - if filename[0] != '/': - filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename - with open(filename) as f: - results_list = json.loads(f.read()) - for results in results_list: - for result in results: - _, avg_size = get_data_size(result["filename"]) - for behavior in result["benchmarks"]: - python_result.append({ - "language": "python", - "dataFileName": extract_file_name(result["filename"]), - "behavior": behavior, - "throughput": avg_size / - result["benchmarks"][behavior] * 1e9 / 2 ** 20 - }) - - -# Java results example: -# [ -# { -# "id": string, -# "instrumentSpec": {...}, -# "measurements": [ -# { -# "weight": float, -# "value": { -# "magnitude": float, -# "unit": string -# }, -# ... -# }, -# ... -# ], -# "run": {...}, -# "scenario": { -# "benchmarkSpec": { -# "methodName": string, -# "parameters": { -# defined parameters in the benchmark: parameters value -# }, -# ... -# }, -# ... -# } -# -# }, -# ... -# ] -def parse_java_result(filename): - global average_bytes_per_message, java_result - if filename == "": - return - if filename[0] != '/': - filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename - with open(filename) as f: - results = json.loads(f.read()) - for result in results: - total_weight = 0 - total_value = 0 - for measurement in result["measurements"]: - total_weight += measurement["weight"] - total_value += measurement["value"]["magnitude"] - avg_time = total_value * 1.0 / total_weight - total_size, _ = get_data_size( - result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) - java_result.append({ - "language": "java", - "throughput": total_size / avg_time * 1e9 / 2 ** 20, - "behavior": result["scenario"]["benchmarkSpec"]["methodName"], - "dataFileName": extract_file_name( - result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) - }) - - -# Go benchmark results: -# -# goos: linux -# goarch: amd64 -# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op -# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op -# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op -# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op -# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op -# PASS -# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s -def parse_go_result(filename): - global go_result - if filename == "": - return - if filename[0] != '/': - filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename - with open(filename) as f: - for line in f: - result_list = re.split("[\ \t]+", line) - if result_list[0][:9] != "Benchmark": - continue - first_slash_index = result_list[0].find('/') - last_slash_index = result_list[0].rfind('/') - full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix - total_bytes, _ = get_data_size(full_filename) - behavior_with_suffix = result_list[0][last_slash_index+1:] - last_dash = behavior_with_suffix.rfind("-") - if last_dash == -1: - behavior = behavior_with_suffix - else: - behavior = behavior_with_suffix[:last_dash] - go_result.append({ - "dataFilename": extract_file_name(full_filename), - "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, - "behavior": behavior, - "language": "go" - }) - - -def get_metadata(): - build_number = os.getenv('BUILD_NUMBER') - build_url = os.getenv('BUILD_URL') - job_name = os.getenv('JOB_NAME') - git_commit = os.getenv('GIT_COMMIT') - # actual commit is the actual head of PR that is getting tested - git_actual_commit = os.getenv('ghprbActualCommit') - - utc_timestamp = str(calendar.timegm(time.gmtime())) - metadata = {'created': utc_timestamp} - - if build_number: - metadata['buildNumber'] = build_number - if build_url: - metadata['buildUrl'] = build_url - if job_name: - metadata['jobName'] = job_name - if git_commit: - metadata['gitCommit'] = git_commit - if git_actual_commit: - metadata['gitActualCommit'] = git_actual_commit - - return metadata - - -def upload_result(result_list, metadata): - for result in result_list: - new_result = copy.deepcopy(result) - new_result['metadata'] = metadata - bq = big_query_utils.create_big_query() - row = big_query_utils.make_row(str(uuid.uuid4()), new_result) - if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET, - _TABLE + "$" + _NOW, - [row]): - print('Error when uploading result', new_result) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("-cpp", "--cpp_input_file", - help="The CPP benchmark result file's name", - default="") - parser.add_argument("-java", "--java_input_file", - help="The Java benchmark result file's name", - default="") - parser.add_argument("-python", "--python_input_file", - help="The Python benchmark result file's name", - default="") - parser.add_argument("-go", "--go_input_file", - help="The golang benchmark result file's name", - default="") - args = parser.parse_args() - - parse_cpp_result(args.cpp_input_file) - parse_python_result(args.python_input_file) - parse_java_result(args.java_input_file) - parse_go_result(args.go_input_file) - - metadata = get_metadata() - print("uploading cpp results...") - upload_result(cpp_result, metadata) - print("uploading java results...") - upload_result(java_result, metadata) - print("uploading python results...") - upload_result(python_result, metadata) - print("uploading go results...") - upload_result(go_result, metadata) diff --git a/kokoro/linux/benchmark/build.sh b/kokoro/linux/benchmark/build.sh index af5b299e..3463ba9b 100755 --- a/kokoro/linux/benchmark/build.sh +++ b/kokoro/linux/benchmark/build.sh @@ -86,7 +86,7 @@ echo "benchmarking java..." # upload result to bq make python_add_init -env LD_LIBRARY_PATH="$oldpwd/src/.libs" python util/run_and_upload.py -cpp="../tmp/cpp_result.json" -java="../tmp/java_result.json" \ +env LD_LIBRARY_PATH="$oldpwd/src/.libs" python -m util.result_uploader -cpp="../tmp/cpp_result.json" -java="../tmp/java_result.json" \ -python="../tmp/python_result.json" -go="../tmp/go_result.txt" cd $oldpwd -- cgit v1.2.3