[SPARK-9179] [BUILD] Allows committers to specify primary author of the PR to be merged

It's a common case that some contributor contributes an initial version of a feature/bugfix, and later on some other people (mostly committers) fork and add more improvements. When merging these PRs, we probably want to specify the original author as the primary author. Currently we can only do this by running ``` $ git commit --amend --author="name <email>" ``` manually right before the merge script pushes to Apache Git repo. It would be nice if the script accepts user specified primary author information. Author: Cheng Lian <lian@databricks.com> Closes #7508 from liancheng/spark-9179 and squashes the following commits: 218d88e [Cheng Lian] Allows committers to specify primary author of the PR to be merged
author: Cheng Lian <lian@databricks.com> 2015-07-19 17:37:25 +0800
committer: Cheng Lian <lian@databricks.com> 2015-07-19 17:37:25 +0800
commit: bc24289f5d54e4ff61cd75a5941338c9d946ff73 (patch)
tree: 7ead1a0401ee42e23a8e167e4b046b1b0450957e /dev
parent: 3427937ea2a4ed19142bd3d66707864879417d61 (diff)
download: spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.tar.gz
spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.tar.bz2
spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.zip
1 files changed, 13 insertions, 10 deletions
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 4a17d48d81..d586a57481 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -130,7 +130,10 @@ def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
                              '--pretty=format:%an <%ae>']).split("\n")
     distinct_authors = sorted(set(commit_authors),
                               key=lambda x: commit_authors.count(x), reverse=True)
-    primary_author = distinct_authors[0]
+    primary_author = raw_input(
+        "Enter primary author in the format of \"name <email>\" [%s]: " %
+        distinct_authors[0])
+
     commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
                       '--pretty=format:%h [%an] %s']).split("\n\n")
 
@@ -281,7 +284,7 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
     resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
     resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
     asf_jira.transition_issue(
-        jira_id, resolve["id"], fixVersions = jira_fix_versions, 
+        jira_id, resolve["id"], fixVersions = jira_fix_versions,
         comment = comment, resolution = {'id': resolution.raw['id']})
 
     print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
@@ -300,7 +303,7 @@ def standardize_jira_ref(text):
     """
     Standardize the [SPARK-XXXXX] [MODULE] prefix
     Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
-    
+
     >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
     '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful'
     >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
@@ -322,11 +325,11 @@ def standardize_jira_ref(text):
     """
     jira_refs = []
     components = []
-    
+
     # If the string is compliant, no need to process any further
     if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)):
         return text
-    
+
     # Extract JIRA ref(s):
     pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
     for ref in pattern.findall(text):
@@ -348,18 +351,18 @@ def standardize_jira_ref(text):
 
     # Assemble full text (JIRA ref(s), module(s), remaining text)
     clean_text = ' '.join(jira_refs).strip() + " " + ' '.join(components).strip() + " " + text.strip()
-    
+
     # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
     clean_text = re.sub(r'\s+', ' ', clean_text.strip())
-    
+
     return clean_text
 
 def main():
     global original_head
-    
+
     os.chdir(SPARK_HOME)
     original_head = run_cmd("git rev-parse HEAD")[:8]
-    
+
     branches = get_json("%s/branches" % GITHUB_API_BASE)
     branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
     # Assumes branch names can be sorted lexicographically
@@ -448,5 +451,5 @@ if __name__ == "__main__":
     (failure_count, test_count) = doctest.testmod()
     if failure_count:
         exit(-1)
-    
+
     main()
author	Cheng Lian <lian@databricks.com>	2015-07-19 17:37:25 +0800
committer	Cheng Lian <lian@databricks.com>	2015-07-19 17:37:25 +0800
commit	bc24289f5d54e4ff61cd75a5941338c9d946ff73 (patch)
tree	7ead1a0401ee42e23a8e167e4b046b1b0450957e /dev
parent	3427937ea2a4ed19142bd3d66707864879417d61 (diff)
download	spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.tar.gz spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.tar.bz2 spark-bc24289f5d54e4ff61cd75a5941338c9d946ff73.zip