#!/usr/bin/env python # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # This script automates the process of creating release notes. import os import re import sys from releaseutils import * # You must set the following before use! JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") RELEASE_TAG = os.environ.get("RELEASE_TAG", "v1.2.0-rc2") PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0") # If the release tags are not provided, prompt the user to provide them while not tag_exists(RELEASE_TAG): RELEASE_TAG = raw_input("Please provide a valid release tag: ") while not tag_exists(PREVIOUS_RELEASE_TAG): print("Please specify the previous release tag.") PREVIOUS_RELEASE_TAG = raw_input( "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ") # Gather commits found in the new tag but not in the old tag. # This filters commits based on both the git hash and the PR number. # If either is present in the old tag, then we ignore the commit. print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)) release_commits = get_commits(RELEASE_TAG) previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG) previous_release_hashes = set() previous_release_prs = set() for old_commit in previous_release_commits: previous_release_hashes.add(old_commit.get_hash()) if old_commit.get_pr_number(): previous_release_prs.add(old_commit.get_pr_number()) new_commits = [] for this_commit in release_commits: this_hash = this_commit.get_hash() this_pr_number = this_commit.get_pr_number() if this_hash in previous_release_hashes: continue if this_pr_number and this_pr_number in previous_release_prs: continue new_commits.append(this_commit) if not new_commits: sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)) # Prompt the user for confirmation that the commit range is correct print("\n==================================================================================") print("JIRA server: %s" % JIRA_API_BASE) print("Release tag: %s" % RELEASE_TAG) print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG) print("Number of commits in this range: %s" % len(new_commits)) print def print_indented(_list): for x in _list: print(" %s" % x) if yesOrNoPrompt("Show all commits?"): print_indented(new_commits) print("==================================================================================\n") if not yesOrNoPrompt("Does this look correct?"): sys.exit("Ok, exiting") # Filter out special commits releases = [] maintenance = [] reverts = [] nojiras = [] filtered_commits = [] def is_release(commit_title): return re.findall("\[release\]", commit_title.lower()) or \ "preparing spark release" in commit_title.lower() or \ "preparing development version" in commit_title.lower() or \ "CHANGES.txt" in commit_title def is_maintenance(commit_title): return "maintenance" in commit_title.lower() or \ "manually close" in commit_title.lower() def has_no_jira(commit_title): return not re.findall("SPARK-[0-9]+", commit_title.upper()) def is_revert(commit_title): return "revert" in commit_title.lower() def is_docs(commit_title): return re.findall("docs*", commit_title.lower()) or \ "programming guide" in commit_title.lower() for c in new_commits: t = c.get_title() if not t: continue elif is_release(t): releases.append(c) elif is_maintenance(t): maintenance.append(c) elif is_revert(t): reverts.append(c) elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers elif has_no_jira(t): nojiras.append(c) else: filtered_commits.append(c) # Warn against ignored commits if releases or maintenance or reverts or nojiras: print("\n==================================================================================") if releases: print("Found %d release commits" % len(releases)) if maintenance: print("Found %d maintenance commits" % len(maintenance)) if reverts: print("Found %d revert commits" % len(reverts)) if nojiras: print("Found %d commits with no JIRA" % len(nojiras)) print("* Warning: these commits will be ignored.\n") if yesOrNoPrompt("Show ignored commits?"): if releases: print("Release (%d)" % len(releases)) print_indented(releases) if maintenance: print("Maintenance (%d)" % len(maintenance)) print_indented(maintenance) if reverts: print("Revert (%d)" % len(reverts)) print_indented(reverts) if nojiras: print("No JIRA (%d)" % len(nojiras)) print_indented(nojiras) print("==================== Warning: the above commits will be ignored ==================\n") prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits) if not yesOrNoPrompt(prompt_msg): sys.exit("Ok, exiting.") # Keep track of warnings to tell the user at the end warnings = [] # Mapping from the invalid author name to its associated JIRA issues # E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471") invalid_authors = {} # Populate a map that groups issues and components by author # It takes the form: Author name -> { Contribution type -> Spark components } # For instance, # { # 'Andrew Or': { # 'bug fixes': ['windows', 'core', 'web ui'], # 'improvements': ['core'] # }, # 'Tathagata Das' : { # 'bug fixes': ['streaming'] # 'new feature': ['streaming'] # } # } # author_info = {} jira_options = {"server": JIRA_API_BASE} jira_client = JIRA(options=jira_options) print("\n=========================== Compiling contributor list ===========================") for commit in filtered_commits: _hash = commit.get_hash() title = commit.get_title() issues = re.findall("SPARK-[0-9]+", title.upper()) author = commit.get_author() date = get_date(_hash) # If the author name is invalid, keep track of it along # with all associated issues so we can translate it later if is_valid_author(author): author = capitalize_author(author) else: if author not in invalid_authors: invalid_authors[author] = set() for issue in issues: invalid_authors[author].add(issue) # Parse components from the commit title, if any commit_components = find_components(title, _hash) # Populate or merge an issue into author_info[author] def populate(issue_type, components): components = components or [CORE_COMPONENT] # assume core if no components provided if author not in author_info: author_info[author] = {} if issue_type not in author_info[author]: author_info[author][issue_type] = set() for component in components: author_info[author][issue_type].add(component) # Find issues and components associated with this commit for issue in issues: try: jira_issue = jira_client.issue(issue) jira_type = jira_issue.fields.issuetype.name jira_type = translate_issue_type(jira_type, issue, warnings) jira_components = [translate_component(c.name, _hash, warnings) for c in jira_issue.fields.components] all_components = set(jira_components + commit_components) populate(jira_type, all_components) except Exception as e: print("Unexpected error:", e) # For docs without an associated JIRA, manually add it ourselves if is_docs(title) and not issues: populate("documentation", commit_components) print(" Processed commit %s authored by %s on %s" % (_hash, author, date)) print("==================================================================================\n") # Write to contributors file ordered by author names # Each line takes the format " * Author name -- semi-colon delimited contributions" # e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core # e.g. * Tathagata Das -- Bug fixes and new features in Streaming contributors_file = open(contributors_file_name, "w") authors = author_info.keys() authors.sort() for author in authors: contribution = "" components = set() issue_types = set() for issue_type, comps in author_info[author].items(): components.update(comps) issue_types.add(issue_type) # If there is only one component, mention it only once # e.g. Bug fixes, improvements in MLlib if len(components) == 1: contribution = "%s in %s" % (nice_join(issue_types), next(iter(components))) # Otherwise, group contributions by issue types instead of modules # e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN else: contributions = ["%s in %s" % (issue_type, nice_join(comps)) for issue_type, comps in author_info[author].items()] contribution = "; ".join(contributions) # Do not use python's capitalize() on the whole string to preserve case assert contribution contribution = contribution[0].capitalize() + contribution[1:] # If the author name is invalid, use an intermediate format that # can be translated through translate-contributors.py later # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672 if author in invalid_authors and invalid_authors[author]: author = author + "/" + "/".join(invalid_authors[author]) # line = " * %s -- %s" % (author, contribution) line = author contributors_file.write(line + "\n") contributors_file.close() print("Contributors list is successfully written to %s!" % contributors_file_name) # Prompt the user to translate author names if necessary if invalid_authors: warnings.append("Found the following invalid authors:") for a in invalid_authors: warnings.append("\t%s" % a) warnings.append("Please run './translate-contributors.py' to translate them.") # Log any warnings encountered in the process if warnings: print("\n============ Warnings encountered while creating the contributor list ============") for w in warnings: print(w) print("Please correct these in the final contributors list at %s." % contributors_file_name) print("==================================================================================\n")