aboutsummaryrefslogtreecommitdiff
path: root/dev/create-release/generate-contributors.py
diff options
context:
space:
mode:
Diffstat (limited to 'dev/create-release/generate-contributors.py')
-rwxr-xr-xdev/create-release/generate-contributors.py52
1 files changed, 31 insertions, 21 deletions
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index 99c29ef9ff..a3b78a3eac 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -26,8 +26,6 @@ from releaseutils import *
# You must set the following before use!
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
-JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
-JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
START_COMMIT = os.environ.get("START_COMMIT", "37b100")
END_COMMIT = os.environ.get("END_COMMIT", "3693ae")
@@ -40,8 +38,6 @@ if not START_COMMIT or not END_COMMIT:
END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ")
# Verify provided arguments
-if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided")
-if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided")
start_commit_line = get_one_line(START_COMMIT)
end_commit_line = get_one_line(END_COMMIT)
num_commits = num_commits_in_range(START_COMMIT, END_COMMIT)
@@ -60,14 +56,6 @@ if response.lower() != "y" and response:
sys.exit("Ok, exiting")
print "==================================================================================\n"
-# Setup JIRA and github clients. We use two JIRA clients, one with authentication
-# and one without, because authentication is slow and required only when we query
-# JIRA user details but not Spark issues
-jira_options = { "server": JIRA_API_BASE }
-jira_client = JIRA(options = jira_options)
-jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
-github_client = Github()
-
# Find all commits within this range
print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT)
commits = get_one_line_commits(START_COMMIT, END_COMMIT)
@@ -105,13 +93,17 @@ if releases or reverts or nojiras:
if reverts: print "Reverts (%d)" % len(reverts); print_indented(reverts)
if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
print "==================== Warning: the above commits will be ignored ==================\n"
-response = raw_input("%d commits left to process. Ok to proceed? [y/N] " % len(filtered_commits))
-if response.lower() != "y":
+response = raw_input("%d commits left to process. Ok to proceed? [Y/n] " % len(filtered_commits))
+if response.lower() != "y" and response:
sys.exit("Ok, exiting.")
# Keep track of warnings to tell the user at the end
warnings = []
+# Mapping from the invalid author name to its associated JIRA issues
+# E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471")
+invalid_authors = {}
+
# Populate a map that groups issues and components by author
# It takes the form: Author name -> { Contribution type -> Spark components }
# For instance,
@@ -127,16 +119,23 @@ warnings = []
# }
#
author_info = {}
+jira_options = { "server": JIRA_API_BASE }
+jira_client = JIRA(options = jira_options)
print "\n=========================== Compiling contributor list ==========================="
for commit in filtered_commits:
commit_hash = re.findall("^[a-z0-9]+", commit)[0]
issues = re.findall("SPARK-[0-9]+", commit.upper())
- # Translate the author in case the github username is not an actual name
- # Also guard against any special characters used in the name
- # Note the JIRA client we use here must have authentication enabled
author = get_author(commit_hash)
- author = unidecode.unidecode(unicode(author, "UTF-8"))
- author = translate_author(author, github_client, jira_client_auth, warnings)
+ author = unidecode.unidecode(unicode(author, "UTF-8")).strip() # guard against special characters
+ # If the author name is invalid, keep track of it along
+ # with all associated issues so we can translate it later
+ if is_valid_author(author):
+ author = capitalize_author(author)
+ else:
+ if author not in invalid_authors:
+ invalid_authors[author] = set()
+ for issue in issues:
+ invalid_authors[author].add(issue)
date = get_date(commit_hash)
# Parse components from the commit message, if any
commit_components = find_components(commit, commit_hash)
@@ -147,7 +146,7 @@ for commit in filtered_commits:
author_info[author] = {}
if issue_type not in author_info[author]:
author_info[author][issue_type] = set()
- for component in all_components:
+ for component in components:
author_info[author][issue_type].add(component)
# Find issues and components associated with this commit
for issue in issues:
@@ -168,7 +167,6 @@ print "=========================================================================
# Each line takes the format "Author name - semi-colon delimited contributions"
# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
# e.g. Tathagata Das - Bug fixes and new features in Streaming
-contributors_file_name = "contributors.txt"
contributors_file = open(contributors_file_name, "w")
authors = author_info.keys()
authors.sort()
@@ -192,11 +190,23 @@ for author in authors:
# Do not use python's capitalize() on the whole string to preserve case
assert contribution
contribution = contribution[0].capitalize() + contribution[1:]
+ # If the author name is invalid, use an intermediate format that
+ # can be translated through translate-contributors.py later
+ # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
+ if author in invalid_authors and invalid_authors[author]:
+ author = author + "/" + "/".join(invalid_authors[author])
line = "%s - %s" % (author, contribution)
contributors_file.write(line + "\n")
contributors_file.close()
print "Contributors list is successfully written to %s!" % contributors_file_name
+# Prompt the user to translate author names if necessary
+if invalid_authors:
+ warnings.append("Found the following invalid authors:")
+ for a in invalid_authors:
+ warnings.append("\t%s" % a)
+ warnings.append("Please run './translate-contributors.py' to translate them.")
+
# Log any warnings encountered in the process
if warnings:
print "\n============ Warnings encountered while creating the contributor list ============"