aboutsummaryrefslogtreecommitdiff
path: root/dev/create-release
diff options
context:
space:
mode:
Diffstat (limited to 'dev/create-release')
-rwxr-xr-xdev/create-release/generate-contributors.py156
-rwxr-xr-xdev/create-release/releaseutils.py94
-rwxr-xr-xdev/create-release/translate-contributors.py45
3 files changed, 206 insertions, 89 deletions
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index a3b78a3eac..e8f81ccbce 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -26,75 +26,103 @@ from releaseutils import *
# You must set the following before use!
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
-START_COMMIT = os.environ.get("START_COMMIT", "37b100")
-END_COMMIT = os.environ.get("END_COMMIT", "3693ae")
-
-# If commit range is not specified, prompt the user to provide it
-if not START_COMMIT or not END_COMMIT:
- print "A commit range is required to proceed."
- if not START_COMMIT:
- START_COMMIT = raw_input("Please specify starting commit hash (inclusive): ")
- if not END_COMMIT:
- END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ")
-
-# Verify provided arguments
-start_commit_line = get_one_line(START_COMMIT)
-end_commit_line = get_one_line(END_COMMIT)
-num_commits = num_commits_in_range(START_COMMIT, END_COMMIT)
-if not start_commit_line: sys.exit("Start commit %s not found!" % START_COMMIT)
-if not end_commit_line: sys.exit("End commit %s not found!" % END_COMMIT)
-if num_commits == 0:
- sys.exit("There are no commits in the provided range [%s, %s)" % (START_COMMIT, END_COMMIT))
+RELEASE_TAG = os.environ.get("START_COMMIT", "v1.2.0-rc2")
+PREVIOUS_RELEASE_TAG = os.environ.get("END_COMMIT", "v1.1.0")
+
+# If the release tags are not provided, prompt the user to provide them
+while not tag_exists(RELEASE_TAG):
+ RELEASE_TAG = raw_input("Please provide a valid release tag: ")
+while not tag_exists(PREVIOUS_RELEASE_TAG):
+ print "Please specify the previous release tag."
+ PREVIOUS_RELEASE_TAG = raw_input(\
+ "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
+
+# Gather commits found in the new tag but not in the old tag.
+# This filters commits based on both the git hash and the PR number.
+# If either is present in the old tag, then we ignore the commit.
+print "Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)
+release_commits = get_commits(RELEASE_TAG)
+previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
+previous_release_hashes = set()
+previous_release_prs = set()
+for old_commit in previous_release_commits:
+ previous_release_hashes.add(old_commit.get_hash())
+ if old_commit.get_pr_number():
+ previous_release_prs.add(old_commit.get_pr_number())
+new_commits = []
+for this_commit in release_commits:
+ this_hash = this_commit.get_hash()
+ this_pr_number = this_commit.get_pr_number()
+ if this_hash in previous_release_hashes:
+ continue
+ if this_pr_number and this_pr_number in previous_release_prs:
+ continue
+ new_commits.append(this_commit)
+if not new_commits:
+ sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
+
+# Prompt the user for confirmation that the commit range is correct
print "\n=================================================================================="
print "JIRA server: %s" % JIRA_API_BASE
-print "Start commit (inclusive): %s" % start_commit_line
-print "End commit (non-inclusive): %s" % end_commit_line
-print "Number of commits in this range: %s" % num_commits
+print "Release tag: %s" % RELEASE_TAG
+print "Previous release tag: %s" % PREVIOUS_RELEASE_TAG
+print "Number of commits in this range: %s" % len(new_commits)
print
-response = raw_input("Is this correct? [Y/n] ")
-if response.lower() != "y" and response:
- sys.exit("Ok, exiting")
+def print_indented(_list):
+ for x in _list: print " %s" % x
+if yesOrNoPrompt("Show all commits?"):
+ print_indented(new_commits)
print "==================================================================================\n"
-
-# Find all commits within this range
-print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT)
-commits = get_one_line_commits(START_COMMIT, END_COMMIT)
-if not commits: sys.exit("Error: No commits found within this range!")
-commits = commits.split("\n")
+if not yesOrNoPrompt("Does this look correct?"):
+ sys.exit("Ok, exiting")
# Filter out special commits
releases = []
+maintenance = []
reverts = []
nojiras = []
filtered_commits = []
-def is_release(commit):
- return re.findall("\[release\]", commit.lower()) or\
- "maven-release-plugin" in commit or "CHANGES.txt" in commit
-def has_no_jira(commit):
- return not re.findall("SPARK-[0-9]+", commit.upper())
-def is_revert(commit):
- return "revert" in commit.lower()
-def is_docs(commit):
- return re.findall("docs*", commit.lower()) or "programming guide" in commit.lower()
-for c in commits:
- if not c: continue
- elif is_release(c): releases.append(c)
- elif is_revert(c): reverts.append(c)
- elif is_docs(c): filtered_commits.append(c) # docs may not have JIRA numbers
- elif has_no_jira(c): nojiras.append(c)
+def is_release(commit_title):
+ return re.findall("\[release\]", commit_title.lower()) or\
+ "preparing spark release" in commit_title.lower() or\
+ "preparing development version" in commit_title.lower() or\
+ "CHANGES.txt" in commit_title
+def is_maintenance(commit_title):
+ return "maintenance" in commit_title.lower() or\
+ "manually close" in commit_title.lower()
+def has_no_jira(commit_title):
+ return not re.findall("SPARK-[0-9]+", commit_title.upper())
+def is_revert(commit_title):
+ return "revert" in commit_title.lower()
+def is_docs(commit_title):
+ return re.findall("docs*", commit_title.lower()) or\
+ "programming guide" in commit_title.lower()
+for c in new_commits:
+ t = c.get_title()
+ if not t: continue
+ elif is_release(t): releases.append(c)
+ elif is_maintenance(t): maintenance.append(c)
+ elif is_revert(t): reverts.append(c)
+ elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers
+ elif has_no_jira(t): nojiras.append(c)
else: filtered_commits.append(c)
# Warn against ignored commits
-def print_indented(_list):
- for x in _list: print " %s" % x
-if releases or reverts or nojiras:
+if releases or maintenance or reverts or nojiras:
print "\n=================================================================================="
- if releases: print "Releases (%d)" % len(releases); print_indented(releases)
- if reverts: print "Reverts (%d)" % len(reverts); print_indented(reverts)
- if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
+ if releases: print "Found %d release commits" % len(releases)
+ if maintenance: print "Found %d maintenance commits" % len(maintenance)
+ if reverts: print "Found %d revert commits" % len(reverts)
+ if nojiras: print "Found %d commits with no JIRA" % len(nojiras)
+ print "* Warning: these commits will be ignored.\n"
+ if yesOrNoPrompt("Show ignored commits?"):
+ if releases: print "Release (%d)" % len(releases); print_indented(releases)
+ if maintenance: print "Maintenance (%d)" % len(maintenance); print_indented(maintenance)
+ if reverts: print "Revert (%d)" % len(reverts); print_indented(reverts)
+ if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
print "==================== Warning: the above commits will be ignored ==================\n"
-response = raw_input("%d commits left to process. Ok to proceed? [Y/n] " % len(filtered_commits))
-if response.lower() != "y" and response:
+prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
+if not yesOrNoPrompt(prompt_msg):
sys.exit("Ok, exiting.")
# Keep track of warnings to tell the user at the end
@@ -123,10 +151,11 @@ jira_options = { "server": JIRA_API_BASE }
jira_client = JIRA(options = jira_options)
print "\n=========================== Compiling contributor list ==========================="
for commit in filtered_commits:
- commit_hash = re.findall("^[a-z0-9]+", commit)[0]
- issues = re.findall("SPARK-[0-9]+", commit.upper())
- author = get_author(commit_hash)
- author = unidecode.unidecode(unicode(author, "UTF-8")).strip() # guard against special characters
+ _hash = commit.get_hash()
+ title = commit.get_title()
+ issues = re.findall("SPARK-[0-9]+", title.upper())
+ author = commit.get_author()
+ date = get_date(_hash)
# If the author name is invalid, keep track of it along
# with all associated issues so we can translate it later
if is_valid_author(author):
@@ -136,9 +165,8 @@ for commit in filtered_commits:
invalid_authors[author] = set()
for issue in issues:
invalid_authors[author].add(issue)
- date = get_date(commit_hash)
- # Parse components from the commit message, if any
- commit_components = find_components(commit, commit_hash)
+ # Parse components from the commit title, if any
+ commit_components = find_components(title, _hash)
# Populate or merge an issue into author_info[author]
def populate(issue_type, components):
components = components or [CORE_COMPONENT] # assume core if no components provided
@@ -153,14 +181,14 @@ for commit in filtered_commits:
jira_issue = jira_client.issue(issue)
jira_type = jira_issue.fields.issuetype.name
jira_type = translate_issue_type(jira_type, issue, warnings)
- jira_components = [translate_component(c.name, commit_hash, warnings)\
+ jira_components = [translate_component(c.name, _hash, warnings)\
for c in jira_issue.fields.components]
all_components = set(jira_components + commit_components)
populate(jira_type, all_components)
# For docs without an associated JIRA, manually add it ourselves
- if is_docs(commit) and not issues:
+ if is_docs(title) and not issues:
populate("documentation", commit_components)
- print " Processed commit %s authored by %s on %s" % (commit_hash, author, date)
+ print " Processed commit %s authored by %s on %s" % (_hash, author, date)
print "==================================================================================\n"
# Write to contributors file ordered by author names
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index 76a10c3288..18e16bcb90 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -19,6 +19,7 @@
# This file contains helper methods used in creating a release.
import re
+import sys
from subprocess import Popen, PIPE
try:
@@ -47,20 +48,85 @@ except ImportError:
# Contributors list file name
contributors_file_name = "contributors.txt"
+# Prompt the user to answer yes or no until they do so
+def yesOrNoPrompt(msg):
+ response = raw_input("%s [y/n]: " % msg)
+ while response != "y" and response != "n":
+ return yesOrNoPrompt(msg)
+ return response == "y"
+
# Utility functions run git commands (written with Git 1.8.5)
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
-def get_author(commit_hash):
- return run_cmd(["git", "show", "--quiet", "--pretty=format:%an", commit_hash])
+def run_cmd_error(cmd): return Popen(cmd, stderr=PIPE).communicate()[1]
def get_date(commit_hash):
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
-def get_one_line(commit_hash):
- return run_cmd(["git", "show", "--quiet", "--pretty=format:\"%h %cd %s\"", commit_hash])
-def get_one_line_commits(start_hash, end_hash):
- return run_cmd(["git", "log", "--oneline", "%s..%s" % (start_hash, end_hash)])
-def num_commits_in_range(start_hash, end_hash):
- output = run_cmd(["git", "log", "--oneline", "%s..%s" % (start_hash, end_hash)])
- lines = [line for line in output.split("\n") if line] # filter out empty lines
- return len(lines)
+def tag_exists(tag):
+ stderr = run_cmd_error(["git", "checkout", tag])
+ return "error" not in stderr
+
+# A type-safe representation of a commit
+class Commit:
+ def __init__(self, _hash, author, title, pr_number = None):
+ self._hash = _hash
+ self.author = author
+ self.title = title
+ self.pr_number = pr_number
+ def get_hash(self): return self._hash
+ def get_author(self): return self.author
+ def get_title(self): return self.title
+ def get_pr_number(self): return self.pr_number
+ def __str__(self):
+ closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
+ return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
+
+# Return all commits that belong to the specified tag.
+#
+# Under the hood, this runs a `git log` on that tag and parses the fields
+# from the command output to construct a list of Commit objects. Note that
+# because certain fields reside in the commit description and cannot be parsed
+# through the Github API itself, we need to do some intelligent regex parsing
+# to extract those fields.
+#
+# This is written using Git 1.8.5.
+def get_commits(tag):
+ commit_start_marker = "|=== COMMIT START MARKER ===|"
+ commit_end_marker = "|=== COMMIT END MARKER ===|"
+ field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
+ log_format =\
+ commit_start_marker + "%h" +\
+ field_end_marker + "%an" +\
+ field_end_marker + "%s" +\
+ commit_end_marker + "%b"
+ output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag])
+ commits = []
+ raw_commits = [c for c in output.split(commit_start_marker) if c]
+ for commit in raw_commits:
+ if commit.count(commit_end_marker) != 1:
+ print "Commit end marker not found in commit: "
+ for line in commit.split("\n"): print line
+ sys.exit(1)
+ # Separate commit digest from the body
+ # From the digest we extract the hash, author and the title
+ # From the body, we extract the PR number and the github username
+ [commit_digest, commit_body] = commit.split(commit_end_marker)
+ if commit_digest.count(field_end_marker) != 2:
+ sys.exit("Unexpected format in commit: %s" % commit_digest)
+ [_hash, author, title] = commit_digest.split(field_end_marker)
+ # The PR number and github username is in the commit message
+ # itself and cannot be accessed through any Github API
+ pr_number = None
+ match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
+ if match:
+ [pr_number, github_username] = match.groups()
+ # If the author name is not valid, use the github
+ # username so we can translate it properly later
+ if not is_valid_author(author):
+ author = github_username
+ # Guard against special characters
+ author = unidecode.unidecode(unicode(author, "UTF-8")).strip()
+ commit = Commit(_hash, author, title, pr_number)
+ commits.append(commit)
+ return commits
# Maintain a mapping for translating issue types to contributions in the release notes
# This serves an additional function of warning the user against unknown issue types
@@ -70,10 +136,13 @@ def num_commits_in_range(start_hash, end_hash):
known_issue_types = {
"bug": "bug fixes",
"build": "build fixes",
+ "dependency upgrade": "build fixes",
"improvement": "improvements",
"new feature": "new features",
"documentation": "documentation",
- "test": "test"
+ "test": "test",
+ "task": "improvement",
+ "sub-task": "improvement"
}
# Maintain a mapping for translating component names when creating the release notes
@@ -176,8 +245,7 @@ def get_jira_name(author, jira_client):
# Return whether the given name is in the form <First Name><space><Last Name>
def is_valid_author(author):
if not author: return False
- author_words = len(author.split(" "))
- return author_words == 2 or author_words == 3
+ return " " in author and not re.findall("[0-9]", author)
# Capitalize the first letter of each word in the given author name
def capitalize_author(author):
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
index ef4625b003..462c21142f 100755
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@@ -37,8 +37,11 @@ from releaseutils import *
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
+GITHUB_API_TOKEN = os.environ.get("GITHUB_API_TOKEN", None)
if not JIRA_USERNAME or not JIRA_PASSWORD:
sys.exit("Both JIRA_USERNAME and JIRA_PASSWORD must be set")
+if not GITHUB_API_TOKEN:
+ sys.exit("GITHUB_API_TOKEN must be set")
# Write new contributors list to <old_file_name>.new
if not os.path.isfile(contributors_file_name):
@@ -62,7 +65,7 @@ if INTERACTIVE_MODE:
# Setup Github and JIRA clients
jira_options = { "server": JIRA_API_BASE }
jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
-github_client = Github()
+github_client = Github(GITHUB_API_TOKEN)
# Generate candidates for the given author. This should only be called if the given author
# name does not represent a full name as this operation is somewhat expensive. Under the
@@ -94,7 +97,14 @@ def generate_candidates(author, issues):
# Then do the same for the assignee of each of the associated JIRAs
# Note that a given issue may not have an assignee, or the assignee may not have a full name
for issue in issues:
- jira_issue = jira_client.issue(issue)
+ try:
+ jira_issue = jira_client.issue(issue)
+ except JIRAError as e:
+ # Do not exit just because an issue is not found!
+ if e.status_code == 404:
+ warnings.append("Issue %s not found!" % issue)
+ continue
+ raise e
jira_assignee = jira_issue.fields.assignee
if jira_assignee:
user_name = jira_assignee.name
@@ -123,9 +133,10 @@ def generate_candidates(author, issues):
# In non-interactive mode, this script picks the first valid author name from the candidates
# If no such name exists, the original name is used (without the JIRA numbers).
print "\n========================== Translating contributor list =========================="
-for line in contributors_file:
+lines = contributors_file.readlines()
+for i, line in enumerate(lines):
author = line.split(" - ")[0]
- print "Processing author %s" % author
+ print "Processing author %s (%d/%d)" % (author, i + 1, len(lines))
if not author:
print " ERROR: Expected the following format <author> - <contributions>"
print " ERROR: Actual = %s" % line
@@ -135,30 +146,39 @@ for line in contributors_file:
candidates = generate_candidates(new_author, issues)
# Print out potential replacement candidates along with the sources, e.g.
# [X] No full name found for Github user andrewor14
+ # [X] No assignee found for SPARK-1763
# [0] Andrew Or - Full name of JIRA user andrewor14
# [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14
# [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14
- # [X] No assignee found for SPARK-1763
- # [3] Custom
+ # [3] andrewor14 - Raw Github username
+ # [4] Custom
candidate_names = []
+ bad_prompts = [] # Prompts that can't actually be selected; print these first.
+ good_prompts = [] # Prompts that contain valid choices
for candidate, source in candidates:
if candidate == NOT_FOUND:
- print " [X] %s" % source
+ bad_prompts.append(" [X] %s" % source)
else:
index = len(candidate_names)
candidate_names.append(candidate)
- print " [%d] %s - %s" % (index, candidate, source)
- custom_index = len(candidate_names)
+ good_prompts.append(" [%d] %s - %s" % (index, candidate, source))
+ raw_index = len(candidate_names)
+ custom_index = len(candidate_names) + 1
+ for p in bad_prompts: print p
+ if bad_prompts: print " ---"
+ for p in good_prompts: print p
# In interactive mode, additionally provide "custom" option and await user response
if INTERACTIVE_MODE:
+ print " [%d] %s - Raw Github username" % (raw_index, new_author)
print " [%d] Custom" % custom_index
response = raw_input(" Your choice: ")
- while not response.isdigit() or int(response) > custom_index:
- response = raw_input(" Please enter an integer between 0 and %d: " % custom_index)
+ last_index = custom_index
+ while not response.isdigit() or int(response) > last_index:
+ response = raw_input(" Please enter an integer between 0 and %d: " % last_index)
response = int(response)
if response == custom_index:
new_author = raw_input(" Please type a custom name for this author: ")
- else:
+ elif response != raw_index:
new_author = candidate_names[response]
# In non-interactive mode, just pick the first candidate
else:
@@ -175,6 +195,7 @@ for line in contributors_file:
print " * Replacing %s with %s" % (author, new_author)
line = line.replace(author, new_author)
new_contributors_file.write(line)
+ new_contributors_file.flush()
print "==================================================================================\n"
contributors_file.close()
new_contributors_file.close()