diff options
author | Andrew Or <andrew@databricks.com> | 2014-12-03 19:08:29 -0800 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2014-12-03 19:10:07 -0800 |
commit | a4dfb4efef89f686cbf146db42c2d891fef42500 (patch) | |
tree | 8b884b0ad30bcf33bd6021bd81e5eb52a08e8c2c /dev/create-release/releaseutils.py | |
parent | 657a88835d8bf22488b53d50f75281d7dc32442e (diff) | |
download | spark-a4dfb4efef89f686cbf146db42c2d891fef42500.tar.gz spark-a4dfb4efef89f686cbf146db42c2d891fef42500.tar.bz2 spark-a4dfb4efef89f686cbf146db42c2d891fef42500.zip |
[Release] Correctly translate contributors name in release notes
This commit involves three main changes:
(1) It separates the translation of contributor names from the
generation of the contributors list. This is largely motivated
by the Github API limit; even if we exceed this limit, we should
at least be able to proceed manually as before. This is why the
translation logic is abstracted into its own script
translate-contributors.py.
(2) When we look for candidate replacements for invalid author
names, we should look for the assignees of the associated JIRAs
too. As a result, the intermediate file must keep track of these.
(3) This provides an interactive mode with which the user can
sit at the terminal and manually pick the candidate replacement
that he/she thinks makes the most sense. As before, there is a
non-interactive mode that picks the first candidate that the
script considers "valid."
TODO: We should have a known_contributors file that stores
known mappings so we don't have to go through all of this
translation every time. This is also valuable because some
contributors simply cannot be automatically translated.
Diffstat (limited to 'dev/create-release/releaseutils.py')
-rwxr-xr-x | dev/create-release/releaseutils.py | 39 |
1 files changed, 5 insertions, 34 deletions
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py index 0d6830b11d..76a10c3288 100755 --- a/dev/create-release/releaseutils.py +++ b/dev/create-release/releaseutils.py @@ -44,6 +44,9 @@ except ImportError: print "Install using 'sudo pip install unidecode'" sys.exit(-1) +# Contributors list file name +contributors_file_name = "contributors.txt" + # Utility functions run git commands (written with Git 1.8.5) def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0] def get_author(commit_hash): @@ -69,7 +72,8 @@ known_issue_types = { "build": "build fixes", "improvement": "improvements", "new feature": "new features", - "documentation": "documentation" + "documentation": "documentation", + "test": "test" } # Maintain a mapping for translating component names when creating the release notes @@ -182,36 +186,3 @@ def capitalize_author(author): words = [w[0].capitalize() + w[1:] for w in words if w] return " ".join(words) -# Maintain a mapping of translated author names as a cache -translated_authors = {} - -# Format the given author in a format appropriate for the contributors list. -# If the author is not an actual name, search github and JIRA for potential -# replacements and log all candidates as a warning. -def translate_author(github_author, github_client, jira_client, warnings): - if is_valid_author(github_author): - return capitalize_author(github_author) - # If the translated author is already cached, just return it - if github_author in translated_authors: - return translated_authors[github_author] - # Otherwise, author name is not found, so we need to search for an alternative name - candidates = set() - github_name = get_github_name(github_author, github_client) - jira_name = get_jira_name(github_author, jira_client) - if is_valid_author(github_name): github_name = capitalize_author(github_name) - if is_valid_author(jira_name): jira_name = capitalize_author(jira_name) - if github_name: candidates.add(github_name) - if jira_name: candidates.add(jira_name) - # Only use the github name as a replacement automatically - # The JIRA name may not make sense because it can belong to someone else - if is_valid_author(github_name): - candidates_message = " (another candidate is %s)" % jira_name if jira_name else "" - warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message)) - translated_authors[github_name] = github_name - return translated_authors[github_name] - # No direct replacement, so return the original author and list any candidates found - candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else "" - warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message)) - translated_authors[github_author] = github_author - return translated_authors[github_author] - |