From 193af9f4c1ebfeec4c9322fc4bbb2a513267d345 Mon Sep 17 00:00:00 2001 From: Yilun Chong Date: Wed, 30 May 2018 17:06:45 -0700 Subject: Add proto2 to proto3 util --- benchmarks/Makefile.am | 45 +++++++++-- benchmarks/util/data_proto2_to_proto3_util.h | 63 +++++++++++++++ benchmarks/util/gogo_data_scrubber.cc | 39 +-------- benchmarks/util/proto3_data_stripper.cc | 74 +++++++++++++++++ benchmarks/util/protoc-gen-gogoproto.cc | 4 +- benchmarks/util/protoc-gen-proto2_to_proto3.cc | 108 +++++++++++++++++++++++++ benchmarks/util/schema_proto2_to_proto3_util.h | 62 +++++++++++++- 7 files changed, 349 insertions(+), 46 deletions(-) create mode 100644 benchmarks/util/data_proto2_to_proto3_util.h create mode 100644 benchmarks/util/proto3_data_stripper.cc create mode 100644 benchmarks/util/protoc-gen-proto2_to_proto3.cc (limited to 'benchmarks') diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am index 5c27bbb4..9f609228 100644 --- a/benchmarks/Makefile.am +++ b/benchmarks/Makefile.am @@ -349,11 +349,11 @@ gogo_proto_middleman: protoc-gen-gogoproto oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-gogoproto --gogoproto_out=$$oldpwd/tmp/gogo_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) ) touch gogo_proto_middleman -new_data = $$(for data in $(all_data); do echo "tmp$${data\#$(srcdir)}"; done | xargs) +gogo_data = $$(for data in $(all_data); do echo "tmp/gogo_data$${data\#$(srcdir)}"; done | xargs) generate_gogo_data: protoc_middleman protoc_middleman2 gogo-data-scrubber - mkdir -p `dirname $(new_data)` - ./gogo-data-scrubber $(all_data) $(new_data) + mkdir -p `dirname $(gogo_data)` + ./gogo-data-scrubber $(all_data) $(gogo_data) touch generate_gogo_data make_tmp_dir_gogo: @@ -408,8 +408,6 @@ gogoslick_protoc_middleman: make_tmp_dir_gogo $(top_srcdir)/src/protoc$(EXEEXT) oldpwd=`pwd` && ( cd $(srcdir)/tmp/gogo_proto && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$$oldpwd/$(top_srcdir)/src --gogoslick_out=$$oldpwd/tmp/gogoslick $(benchmarks_protoc_inputs_proto2_message4) ) touch gogoslick_protoc_middleman -gogo_data = $$(find . -type f -name "dataset.*.pb" -path "./tmp/*") - generate-gogo-benchmark-code: @echo '#! /bin/bash' > generate-gogo-benchmark-code @echo 'cp $(srcdir)/go/go_benchmark_test.go tmp/$$1/benchmark_code/$$1_benchmark1_test.go' >> generate-gogo-benchmark-code @@ -453,7 +451,7 @@ gogoslick: gogoslick_protoc_middleman generate_gogo_data gogo-benchmark generat ############ UTIL RULES BEGIN ############ -bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber +bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber protoc-gen-proto2_to_proto3 proto3-data-stripper protoc_gen_gogoproto_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la protoc_gen_gogoproto_SOURCES = util/protoc-gen-gogoproto.cc @@ -468,9 +466,40 @@ nodist_gogo_data_scrubber_SOURCES = \ $(benchmarks_protoc_outputs_proto2) \ $(benchmarks_protoc_outputs_proto2_header) \ $(benchmarks_protoc_outputs_header) + +protoc_gen_proto2_to_proto3_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la +protoc_gen_proto2_to_proto3_SOURCES = util/protoc-gen-proto2_to_proto3.cc +protoc_gen_proto2_to_proto3_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util + +proto3_data_stripper_LDADD = $(top_srcdir)/src/libprotobuf.la +proto3_data_stripper_SOURCES = util/proto3_data_stripper.cc +proto3_data_stripper_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util +util/proto3_data_stripper-proto3_data_stripper.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) +nodist_proto3_data_stripper_SOURCES = \ + $(benchmarks_protoc_outputs) \ + $(benchmarks_protoc_outputs_proto2) \ + $(benchmarks_protoc_outputs_proto2_header) \ + $(benchmarks_protoc_outputs_header) + ############ UTIL RULES END ############ +############ PROTO3 PREPARATION BEGIN ############# + +proto3_proto_middleman: protoc-gen-proto2_to_proto3 + mkdir -p "tmp/proto3_proto" + oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-proto2_to_proto3 --proto2_to_proto3_out=$$oldpwd/tmp/proto3_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) ) + touch proto3_proto_middleman + +proto3_data = $$(for data in $(all_data); do echo "tmp/proto3_data$${data\#$(srcdir)}"; done | xargs) + +generate_proto3_data: protoc_middleman protoc_middleman2 proto3-data-stripper + mkdir -p `dirname $(proto3_data)` + ./proto3-data-stripper $(all_data) $(proto3_data) + touch generate_proto3_data + +############ PROTO3 PREPARATION END ############# + MAINTAINERCLEANFILES = \ Makefile.in @@ -513,7 +542,9 @@ CLEANFILES = \ gogoslick_protoc_middleman \ gogoslick \ gogo-benchmark \ - gogo/cpp_no_group/cpp_benchmark.* + gogo/cpp_no_group/cpp_benchmark.* \ + proto3_proto_middleman \ + generate_proto3_data clean-local: diff --git a/benchmarks/util/data_proto2_to_proto3_util.h b/benchmarks/util/data_proto2_to_proto3_util.h new file mode 100644 index 00000000..7207efde --- /dev/null +++ b/benchmarks/util/data_proto2_to_proto3_util.h @@ -0,0 +1,63 @@ +#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ +#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ + +#include "google/protobuf/message.h" +#include "google/protobuf/descriptor.h" + +using google::protobuf::FieldDescriptor; +using google::protobuf::Message; +using google::protobuf::Reflection; + +namespace google { +namespace protobuf { +namespace util { + +class DataStripper { + public: + void StripMessage(Message *message) { + std::vector set_fields; + const Reflection* reflection = message->GetReflection(); + reflection->ListFields(*message, &set_fields); + + for (size_t i = 0; i < set_fields.size(); i++) { + const FieldDescriptor* field = set_fields[i]; + if (ShouldBeClear(field)) { + reflection->ClearField(message, field); + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + for (int j = 0; j < reflection->FieldSize(*message, field); j++) { + StripMessage(reflection->MutableRepeatedMessage(message, field, j)); + } + } else { + StripMessage(reflection->MutableMessage(message, field)); + } + } + } + + reflection->MutableUnknownFields(message)->Clear(); + } + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) = 0; +}; + +class GogoDataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP; + } +}; + +class Proto3DataStripper : public DataStripper { + private: + virtual bool ShouldBeClear(const FieldDescriptor *field) { + return field->type() == FieldDescriptor::TYPE_GROUP || + field->is_extension(); + } +}; + +} // namespace util +} // namespace protobuf +} // namespace google + +#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_ diff --git a/benchmarks/util/gogo_data_scrubber.cc b/benchmarks/util/gogo_data_scrubber.cc index fb9af6e2..9ef57b0d 100644 --- a/benchmarks/util/gogo_data_scrubber.cc +++ b/benchmarks/util/gogo_data_scrubber.cc @@ -4,43 +4,11 @@ #include "datasets/google_message2/benchmark_message2.pb.h" #include "datasets/google_message3/benchmark_message3.pb.h" #include "datasets/google_message4/benchmark_message4.pb.h" - -#include "google/protobuf/message.h" -#include "google/protobuf/descriptor.h" +#include "data_proto2_to_proto3_util.h" #include -using google::protobuf::FieldDescriptor; -using google::protobuf::Message; -using google::protobuf::Reflection; - - -class DataGroupStripper { - public: - static void StripMessage(Message *message) { - std::vector set_fields; - const Reflection* reflection = message->GetReflection(); - reflection->ListFields(*message, &set_fields); - - for (size_t i = 0; i < set_fields.size(); i++) { - const FieldDescriptor* field = set_fields[i]; - if (field->type() == FieldDescriptor::TYPE_GROUP) { - reflection->ClearField(message, field); - } - if (field->type() == FieldDescriptor::TYPE_MESSAGE) { - if (field->is_repeated()) { - for (int j = 0; j < reflection->FieldSize(*message, field); j++) { - StripMessage(reflection->MutableRepeatedMessage(message, field, j)); - } - } else { - StripMessage(reflection->MutableMessage(message, field)); - } - } - } - - reflection->MutableUnknownFields(message)->Clear(); - } -}; +using google::protobuf::util::GogoDataStripper; std::string ReadFile(const std::string& name) { std::ifstream file(name.c_str()); @@ -91,7 +59,8 @@ int main(int argc, char *argv[]) { for (int i = 0; i < dataset.payload_size(); i++) { message->ParseFromString(dataset.payload(i)); - DataGroupStripper::StripMessage(message); + GogoDataStripper stripper; + stripper.StripMessage(message); dataset.set_payload(i, message->SerializeAsString()); } diff --git a/benchmarks/util/proto3_data_stripper.cc b/benchmarks/util/proto3_data_stripper.cc new file mode 100644 index 00000000..3096c4c1 --- /dev/null +++ b/benchmarks/util/proto3_data_stripper.cc @@ -0,0 +1,74 @@ +#include "benchmarks.pb.h" +#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h" +#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h" +#include "datasets/google_message2/benchmark_message2.pb.h" +#include "datasets/google_message3/benchmark_message3.pb.h" +#include "datasets/google_message4/benchmark_message4.pb.h" +#include "data_proto2_to_proto3_util.h" + +#include + +using google::protobuf::util::Proto3DataStripper; + +std::string ReadFile(const std::string& name) { + std::ifstream file(name.c_str()); + GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" + << name + << "', please make sure you are running this command from the benchmarks" + << " directory.\n"; + return std::string((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); +} + +int main(int argc, char *argv[]) { + if (argc % 2 == 0 || argc == 1) { + std::cerr << "Usage: [input_files] [output_file_names] where " << + "input_files are one to one mapping to output_file_names." << + std::endl; + return 1; + } + + for (int i = argc / 2; i > 0; i--) { + const std::string &input_file = argv[i]; + const std::string &output_file = argv[i + argc / 2]; + + std::cerr << "Generating " << input_file + << " to " << output_file << std::endl; + benchmarks::BenchmarkDataset dataset; + Message* message; + std::string dataset_payload = ReadFile(input_file); + GOOGLE_CHECK(dataset.ParseFromString(dataset_payload)) + << "Can' t parse data file " << input_file; + + if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") { + message = new benchmarks::proto3::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") { + message = new benchmarks::proto2::GoogleMessage1; + } else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") { + message = new benchmarks::proto2::GoogleMessage2; + } else if (dataset.message_name() == + "benchmarks.google_message3.GoogleMessage3") { + message = new benchmarks::google_message3::GoogleMessage3; + } else if (dataset.message_name() == + "benchmarks.google_message4.GoogleMessage4") { + message = new benchmarks::google_message4::GoogleMessage4; + } else { + std::cerr << "Unknown message type: " << dataset.message_name(); + exit(1); + } + + for (int i = 0; i < dataset.payload_size(); i++) { + message->ParseFromString(dataset.payload(i)); + Proto3DataStripper stripper; + stripper.StripMessage(message); + dataset.set_payload(i, message->SerializeAsString()); + } + + std::ofstream ofs(output_file); + ofs << dataset.SerializeAsString(); + ofs.close(); + } + + + return 0; +} diff --git a/benchmarks/util/protoc-gen-gogoproto.cc b/benchmarks/util/protoc-gen-gogoproto.cc index bfa6a5e5..9c1b3d04 100644 --- a/benchmarks/util/protoc-gen-gogoproto.cc +++ b/benchmarks/util/protoc-gen-gogoproto.cc @@ -12,7 +12,7 @@ using google::protobuf::FileDescriptor; using google::protobuf::DescriptorPool; using google::protobuf::io::Printer; using google::protobuf::util::SchemaGroupStripper; -using google::protobuf::util::SchemaAddZeroEnumValue; +using google::protobuf::util::EnumScrubber; namespace google { namespace protobuf { @@ -74,7 +74,7 @@ class GoGoProtoGenerator : public CodeGenerator { file->CopyTo(&new_file); SchemaGroupStripper::StripFile(file, &new_file); - SchemaAddZeroEnumValue enum_scrubber; + EnumScrubber enum_scrubber; enum_scrubber.ScrubFile(&new_file); string filename = file->name(); diff --git a/benchmarks/util/protoc-gen-proto2_to_proto3.cc b/benchmarks/util/protoc-gen-proto2_to_proto3.cc new file mode 100644 index 00000000..b1e6ba41 --- /dev/null +++ b/benchmarks/util/protoc-gen-proto2_to_proto3.cc @@ -0,0 +1,108 @@ +#include "google/protobuf/compiler/code_generator.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/printer.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/descriptor.pb.h" +#include "schema_proto2_to_proto3_util.h" + +#include "google/protobuf/compiler/plugin.h" + +using google::protobuf::FileDescriptorProto; +using google::protobuf::FileDescriptor; +using google::protobuf::DescriptorPool; +using google::protobuf::io::Printer; +using google::protobuf::util::SchemaGroupStripper; +using google::protobuf::util::EnumScrubber; +using google::protobuf::util::ExtensionStripper; +using google::protobuf::util::FieldScrubber; + +namespace google { +namespace protobuf { +namespace compiler { + +namespace { + +string StripProto(string filename) { + if (filename.substr(filename.size() - 11) == ".protodevel") { + // .protodevel + return filename.substr(0, filename.size() - 11); + } else { + // .proto + return filename.substr(0, filename.size() - 6); + } +} + +DescriptorPool new_pool_; + +} // namespace + +class Proto2ToProto3Generator : public CodeGenerator { + public: + virtual bool GenerateAll(const std::vector& files, + const string& parameter, + GeneratorContext* context, + string* error) const { + for (int i = 0; i < files.size(); i++) { + for (auto file : files) { + bool can_generate = + (new_pool_.FindFileByName(file->name()) == nullptr); + for (int j = 0; j < file->dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->public_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->public_dependency(j)->name()) != nullptr); + } + for (int j = 0; j < file->weak_dependency_count(); j++) { + can_generate &= (new_pool_.FindFileByName( + file->weak_dependency(j)->name()) != nullptr); + } + if (can_generate) { + Generate(file, parameter, context, error); + break; + } + } + } + + return true; + } + + virtual bool Generate(const FileDescriptor* file, + const string& parameter, + GeneratorContext* context, + string* error) const { + FileDescriptorProto new_file; + file->CopyTo(&new_file); + SchemaGroupStripper::StripFile(file, &new_file); + + EnumScrubber enum_scrubber; + enum_scrubber.ScrubFile(&new_file); + ExtensionStripper::StripFile(&new_file); + FieldScrubber::ScrubFile(&new_file); + new_file.set_syntax("proto3"); + + string filename = file->name(); + string basename = StripProto(filename); + + std::vector> option_pairs; + ParseGeneratorParameter(parameter, &option_pairs); + + std::unique_ptr output( + context->Open(basename + ".proto")); + string content = new_pool_.BuildFile(new_file)->DebugString(); + Printer printer(output.get(), '$'); + printer.WriteRaw(content.c_str(), content.size()); + + return true; + } +}; + +} // namespace compiler +} // namespace protobuf +} // namespace google + +int main(int argc, char* argv[]) { + google::protobuf::compiler::Proto2ToProto3Generator generator; + return google::protobuf::compiler::PluginMain(argc, argv, &generator); +} diff --git a/benchmarks/util/schema_proto2_to_proto3_util.h b/benchmarks/util/schema_proto2_to_proto3_util.h index 089012dd..8cbcbc40 100644 --- a/benchmarks/util/schema_proto2_to_proto3_util.h +++ b/benchmarks/util/schema_proto2_to_proto3_util.h @@ -74,10 +74,10 @@ class SchemaGroupStripper { }; -class SchemaAddZeroEnumValue { +class EnumScrubber { public: - SchemaAddZeroEnumValue() + EnumScrubber() : total_added_(0) { } @@ -130,6 +130,64 @@ class SchemaAddZeroEnumValue { int total_added_; }; +class ExtensionStripper { + public: + static void StripFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + StripMessage(file->mutable_message_type(i)); + } + file->mutable_extension()->Clear(); + } + private: + static void StripMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + message_type->clear_extension_range(); + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + StripMessage(message_type->mutable_nested_type(i)); + } + } +}; + + +class FieldScrubber { + public: + static void ScrubFile(FileDescriptorProto *file) { + for (int i = 0; i < file->mutable_message_type()->size(); i++) { + ScrubMessage(file->mutable_message_type(i)); + } + for (int i = 0; i < file->mutable_extension()->size(); i++) { + file->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(file->mutable_extension(i))) { + file->mutable_extension(i)->clear_label(); + } + } + } + private: + static bool ShouldClearLabel(const FieldDescriptorProto *field) { + return field->label() == FieldDescriptorProto::LABEL_OPTIONAL || + field->label() == FieldDescriptorProto::LABEL_REQUIRED; + } + + static void ScrubMessage(DescriptorProto *message_type) { + message_type->mutable_extension()->Clear(); + for (int i = 0; i < message_type->mutable_extension()->size(); i++) { + message_type->mutable_extension(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_extension(i))) { + message_type->mutable_extension(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_field()->size(); i++) { + message_type->mutable_field(i)->clear_default_value(); + if (ShouldClearLabel(message_type->mutable_field(i))) { + message_type->mutable_field(i)->clear_label(); + } + } + for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) { + ScrubMessage(message_type->mutable_nested_type(i)); + } + } +}; + } // namespace util } // namespace protobuf } // namespace google -- cgit v1.2.3