From 75626ed79c726ed9fd96d9d143ce6b6c88413bf8 Mon Sep 17 00:00:00 2001 From: Jon Skeet Date: Fri, 8 Apr 2016 12:19:10 +0100 Subject: Add C# codegen changes to enum value names (mostly C++) Overview of changes: - A new C#-specific command-line option, legacy_enum_values to revert to the old behavior - When legacy_enum_values isn't specified, we strip the enum name as a prefix, and PascalCase the value name - A new attribute within the C# code so that we can always tell the original in-proto name Regenerating the C# code with legacy_enum_values leads to code which still compiles and works - but there's more still to do. --- src/google/protobuf/compiler/csharp/csharp_enum.cc | 20 ++++- .../protobuf/compiler/csharp/csharp_generator.cc | 3 + .../compiler/csharp/csharp_generator_unittest.cc | 20 ++++- .../protobuf/compiler/csharp/csharp_helpers.cc | 93 ++++++++++++++++++++++ .../protobuf/compiler/csharp/csharp_helpers.h | 2 + .../protobuf/compiler/csharp/csharp_options.h | 9 ++- 6 files changed, 141 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/google/protobuf/compiler/csharp/csharp_enum.cc b/src/google/protobuf/compiler/csharp/csharp_enum.cc index 9616f172..bdfcc2be 100644 --- a/src/google/protobuf/compiler/csharp/csharp_enum.cc +++ b/src/google/protobuf/compiler/csharp/csharp_enum.cc @@ -41,6 +41,7 @@ #include #include #include +#include using google::protobuf::internal::scoped_ptr; @@ -64,11 +65,24 @@ void EnumGenerator::Generate(io::Printer* printer) { "access_level", class_access_level(), "name", descriptor_->name()); printer->Indent(); + std::set used_names; for (int i = 0; i < descriptor_->value_count(); i++) { WriteEnumValueDocComment(printer, descriptor_->value(i)); - printer->Print("$name$ = $number$,\n", - "name", descriptor_->value(i)->name(), - "number", SimpleItoa(descriptor_->value(i)->number())); + string original_name = descriptor_->value(i)->name(); + string name = options()->legacy_enum_values + ? descriptor_->value(i)->name() + : GetEnumValueName(descriptor_->name(), descriptor_->value(i)->name()); + // Make sure we don't get any duplicate names due to prefix removal. + while (!used_names.insert(name).second) { + // It's possible we'll end up giving this warning multiple times, but that's better than not at all. + GOOGLE_LOG(WARNING) << "Duplicate enum value " << name << " (originally " << original_name + << ") in " << descriptor_->name() << "; adding underscore to distinguish"; + name += "_"; + } + printer->Print("[pbr::OriginalName(\"$original_name$\")] $name$ = $number$,\n", + "original_name", original_name, + "name", name, + "number", SimpleItoa(descriptor_->value(i)->number())); } printer->Outdent(); printer->Print("}\n"); diff --git a/src/google/protobuf/compiler/csharp/csharp_generator.cc b/src/google/protobuf/compiler/csharp/csharp_generator.cc index c13ed65b..d74e8c88 100644 --- a/src/google/protobuf/compiler/csharp/csharp_generator.cc +++ b/src/google/protobuf/compiler/csharp/csharp_generator.cc @@ -83,6 +83,9 @@ bool Generator::Generate( cli_options.base_namespace_specified = true; } else if (options[i].first == "internal_access") { cli_options.internal_access = true; + } else if (options[i].first == "legacy_enum_values") { + // TODO: Remove this before final release + cli_options.legacy_enum_values = true; } else { *error = "Unknown generator option: " + options[i].first; return false; diff --git a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc index 7ef7df42..5755fee0 100644 --- a/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc +++ b/src/google/protobuf/compiler/csharp/csharp_generator_unittest.cc @@ -30,8 +30,8 @@ #include -#include #include +#include #include #include @@ -45,7 +45,23 @@ namespace compiler { namespace csharp { namespace { -// TODO(jtattermusch): add some tests. +TEST(CSharpEnumValue, PascalCasedPrefixStripping) { + EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "FOO__BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "FOO_BAR_BAZ")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "Foo_BarBaz")); + EXPECT_EQ("Bar", GetEnumValueName("FO_O", "FOO_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("FOO", "F_O_O_BAR")); + EXPECT_EQ("Bar", GetEnumValueName("Foo", "BAR")); + EXPECT_EQ("BarBaz", GetEnumValueName("Foo", "BAR_BAZ")); + EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO")); + EXPECT_EQ("Foo", GetEnumValueName("Foo", "FOO___")); + // Identifiers can't start with digits + EXPECT_EQ("_2Bar", GetEnumValueName("Foo", "FOO_2_BAR")); + EXPECT_EQ("_2", GetEnumValueName("Foo", "FOO___2")); +} } // namespace } // namespace csharp diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.cc b/src/google/protobuf/compiler/csharp/csharp_helpers.cc index 41265f9a..efd01556 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.cc +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.cc @@ -178,6 +178,99 @@ std::string UnderscoresToPascalCase(const std::string& input) { return UnderscoresToCamelCase(input, true); } +// Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty) +// into a PascalCase string. Precise rules implemented: + +// Previous input character Current character Case +// Any Non-alphanumeric Skipped +// None - first char of input Alphanumeric Upper +// Non-letter (e.g. _ or 1) Alphanumeric Upper +// Numeric Alphanumeric Upper +// Lower letter Alphanumeric Same as current +// Upper letter Alphanumeric Lower +std::string ShoutyToPascalCase(const std::string& input) { + string result; + // Simple way of implementing "always start with upper" + char previous = '_'; + for (int i = 0; i < input.size(); i++) { + char current = input[i]; + if (!ascii_isalnum(current)) { + previous = current; + continue; + } + if (!ascii_isalnum(previous)) { + result += ascii_toupper(current); + } else if (ascii_isdigit(previous)) { + result += ascii_toupper(current); + } else if (ascii_islower(previous)) { + result += current; + } else { + result += ascii_tolower(current); + } + previous = current; + } + return result; +} + +// Attempt to remove a prefix from a value, ignoring casing and skipping underscores. +// (foo, foo_bar) => bar - underscore after prefix is skipped +// (FOO, foo_bar) => bar - casing is ignored +// (foo_bar, foobarbaz) => baz - underscore in prefix is ignored +// (foobar, foo_barbaz) => baz - underscore in value is ignored +// (foo, bar) => bar - prefix isn't matched; return original value +std::string TryRemovePrefix(const std::string& prefix, const std::string& value) { + // First normalize to a lower-case no-underscores prefix to match against + std::string prefix_to_match = ""; + for (size_t i = 0; i < prefix.size(); i++) { + if (prefix[i] != '_') { + prefix_to_match += ascii_tolower(prefix[i]); + } + } + + // This keeps track of how much of value we've consumed + size_t prefix_index, value_index; + for (prefix_index = 0, value_index = 0; + prefix_index < prefix_to_match.size() && value_index < value.size(); + value_index++) { + // Skip over underscores in the value + if (value[value_index] == '_') { + continue; + } + if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) { + // Failed to match the prefix - bail out early. + return value; + } + } + + // If we didn't finish looking through the prefix, we can't strip it. + if (prefix_index < prefix_to_match.size()) { + return value; + } + + // Step over any underscores after the prefix + while (value_index < value.size() && value[value_index] == '_') { + value_index++; + } + + // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip. + if (value_index == value.size()) { + return value; + } + + return value.substr(value_index); +} + +std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) { + std::string stripped = TryRemovePrefix(enum_name, enum_value_name); + std::string result = ShoutyToPascalCase(stripped); + // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned + // string is a valid identifier. + if (ascii_isdigit(result[0])) { + result = "_" + result; + } + return result; +} + std::string ToCSharpName(const std::string& name, const FileDescriptor* file) { std::string result = GetFileNamespace(file); if (result != "") { diff --git a/src/google/protobuf/compiler/csharp/csharp_helpers.h b/src/google/protobuf/compiler/csharp/csharp_helpers.h index eaf85014..8830e957 100644 --- a/src/google/protobuf/compiler/csharp/csharp_helpers.h +++ b/src/google/protobuf/compiler/csharp/csharp_helpers.h @@ -93,6 +93,8 @@ inline std::string UnderscoresToCamelCase(const std::string& input, bool cap_nex std::string UnderscoresToPascalCase(const std::string& input); +std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name); + // TODO(jtattermusch): perhaps we could move this to strutil std::string StringToBase64(const std::string& input); diff --git a/src/google/protobuf/compiler/csharp/csharp_options.h b/src/google/protobuf/compiler/csharp/csharp_options.h index 426fb3b5..4079bf7f 100644 --- a/src/google/protobuf/compiler/csharp/csharp_options.h +++ b/src/google/protobuf/compiler/csharp/csharp_options.h @@ -45,7 +45,8 @@ struct Options { file_extension(".cs"), base_namespace(""), base_namespace_specified(false), - internal_access(false) { + internal_access(false), + legacy_enum_values(false) { } // Extension of the generated file. Defaults to ".cs" string file_extension; @@ -68,6 +69,12 @@ struct Options { // Whether the generated classes should have accessibility level of "internal". // Defaults to false that generates "public" classes. bool internal_access; + // By default, C# codegen now uses PascalCased enum values names, after + // removing the enum type name as a prefix (if it *is* a prefix of the value). + // Setting this option reverts to the previous behavior of just copying the + // value name specified in the .proto file, allowing gradual migration. + // This option will be removed before final release. + bool legacy_enum_values; }; } // namespace csharp -- cgit v1.2.3