aboutsummaryrefslogtreecommitdiff
path: root/dev/create-release/releaseutils.py
diff options
context:
space:
mode:
authorAndrew Or <andrew@databricks.com>2014-12-03 19:08:29 -0800
committerAndrew Or <andrew@databricks.com>2014-12-03 19:10:07 -0800
commita4dfb4efef89f686cbf146db42c2d891fef42500 (patch)
tree8b884b0ad30bcf33bd6021bd81e5eb52a08e8c2c /dev/create-release/releaseutils.py
parent657a88835d8bf22488b53d50f75281d7dc32442e (diff)
downloadspark-a4dfb4efef89f686cbf146db42c2d891fef42500.tar.gz
spark-a4dfb4efef89f686cbf146db42c2d891fef42500.tar.bz2
spark-a4dfb4efef89f686cbf146db42c2d891fef42500.zip
[Release] Correctly translate contributors name in release notes
This commit involves three main changes: (1) It separates the translation of contributor names from the generation of the contributors list. This is largely motivated by the Github API limit; even if we exceed this limit, we should at least be able to proceed manually as before. This is why the translation logic is abstracted into its own script translate-contributors.py. (2) When we look for candidate replacements for invalid author names, we should look for the assignees of the associated JIRAs too. As a result, the intermediate file must keep track of these. (3) This provides an interactive mode with which the user can sit at the terminal and manually pick the candidate replacement that he/she thinks makes the most sense. As before, there is a non-interactive mode that picks the first candidate that the script considers "valid." TODO: We should have a known_contributors file that stores known mappings so we don't have to go through all of this translation every time. This is also valuable because some contributors simply cannot be automatically translated.
Diffstat (limited to 'dev/create-release/releaseutils.py')
-rwxr-xr-xdev/create-release/releaseutils.py39
1 files changed, 5 insertions, 34 deletions
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index 0d6830b11d..76a10c3288 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -44,6 +44,9 @@ except ImportError:
print "Install using 'sudo pip install unidecode'"
sys.exit(-1)
+# Contributors list file name
+contributors_file_name = "contributors.txt"
+
# Utility functions run git commands (written with Git 1.8.5)
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
def get_author(commit_hash):
@@ -69,7 +72,8 @@ known_issue_types = {
"build": "build fixes",
"improvement": "improvements",
"new feature": "new features",
- "documentation": "documentation"
+ "documentation": "documentation",
+ "test": "test"
}
# Maintain a mapping for translating component names when creating the release notes
@@ -182,36 +186,3 @@ def capitalize_author(author):
words = [w[0].capitalize() + w[1:] for w in words if w]
return " ".join(words)
-# Maintain a mapping of translated author names as a cache
-translated_authors = {}
-
-# Format the given author in a format appropriate for the contributors list.
-# If the author is not an actual name, search github and JIRA for potential
-# replacements and log all candidates as a warning.
-def translate_author(github_author, github_client, jira_client, warnings):
- if is_valid_author(github_author):
- return capitalize_author(github_author)
- # If the translated author is already cached, just return it
- if github_author in translated_authors:
- return translated_authors[github_author]
- # Otherwise, author name is not found, so we need to search for an alternative name
- candidates = set()
- github_name = get_github_name(github_author, github_client)
- jira_name = get_jira_name(github_author, jira_client)
- if is_valid_author(github_name): github_name = capitalize_author(github_name)
- if is_valid_author(jira_name): jira_name = capitalize_author(jira_name)
- if github_name: candidates.add(github_name)
- if jira_name: candidates.add(jira_name)
- # Only use the github name as a replacement automatically
- # The JIRA name may not make sense because it can belong to someone else
- if is_valid_author(github_name):
- candidates_message = " (another candidate is %s)" % jira_name if jira_name else ""
- warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message))
- translated_authors[github_name] = github_name
- return translated_authors[github_name]
- # No direct replacement, so return the original author and list any candidates found
- candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else ""
- warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message))
- translated_authors[github_author] = github_author
- return translated_authors[github_author]
-