SPARK-2596 A tool for mirroring github pull requests on JIRA.

For a bunch of reasons we should automatically populate a JIRA with information about new pull requests when they arrive. I've written a small python script to do this that we can run from Jenkins every 5 or 10 minutes to keep things in sync. Author: Patrick Wendell <pwendell@gmail.com> Closes #1496 from pwendell/github-integration and squashes the following commits: 55ad226 [Patrick Wendell] Small fix afda547 [Patrick Wendell] Use sequence instead of dictiory for JIRA's 3e18cc1 [Patrick Wendell] Small edits 84c5606 [Patrick Wendell] SPARK-2596 A tool for mirroring github pull requests on JIRA.
author: Patrick Wendell <pwendell@gmail.com> 2014-07-19 18:19:08 -0700
committer: Patrick Wendell <pwendell@gmail.com> 2014-07-19 18:19:08 -0700
commit: 49e472744951d875627d78b0d6e93cd139232929 (patch)
tree: d4521af5c517e327e78af5d8517b24ef2958d37b /dev
parent: 1efb3698b6cf39a80683b37124d2736ebf3c9d9a (diff)
download: spark-49e472744951d875627d78b0d6e93cd139232929.tar.gz
spark-49e472744951d875627d78b0d6e93cd139232929.tar.bz2
spark-49e472744951d875627d78b0d6e93cd139232929.zip
1 files changed, 141 insertions, 0 deletions
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
new file mode 100755
index 0000000000..4b0e266bbe
--- /dev/null
+++ b/dev/github_jira_sync.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Utility for updating JIRA's with information about Github pull requests
+
+import json
+import os
+import re
+import sys
+import urllib2
+
+try:
+    import jira.client
+except ImportError:
+    print "This tool requires the jira-python library"
+    print "Install using 'sudo pip install jira-python'"
+    sys.exit(-1)
+
+# User facing configs
+GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
+JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
+JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark")
+JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX")
+# Maximum number of updates to perform in one run
+MAX_UPDATES = int(os.environ.get("MAX_UPDATES", "100000"))
+# Cut-off for oldest PR on which to comment. Useful for avoiding
+# "notification overload" when running for the first time.
+MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
+
+# File used as an opitimization to store maximum previously seen PR
+# Used mostly because accessing ASF JIRA is slow, so we want to avoid checking
+# the state of JIRA's that are tied to PR's we've already looked at.
+MAX_FILE = ".github-jira-max"
+
+def get_url(url):
+    try:
+        return urllib2.urlopen(url)
+    except urllib2.HTTPError as e:
+        print "Unable to fetch URL, exiting: %s" % url
+        sys.exit(-1)
+
+def get_json(urllib_response):
+    return json.load(urllib_response)
+
+# Return a list of (JIRA id, JSON dict) tuples:
+# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
+def get_jira_prs():
+    result = []
+    has_next_page = True
+    page_num = 0
+    while has_next_page:
+	page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
+	page_json = get_json(page)
+
+	for pull in page_json:
+	    jiras = re.findall("SPARK-[0-9]{4,5}", pull['title'])
+	    for jira in jiras:
+		result = result + [(jira,  pull)]
+
+	# Check if there is another page
+	link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
+	if not "next"in link_header:
+	    has_next_page = False
+	else:
+	    page_num = page_num + 1
+    return result
+
+def set_max_pr(max_val):
+    f = open(MAX_FILE, 'w')
+    f.write("%s" % max_val)
+    f.close()
+    print "Writing largest PR number seen: %s" % max_val
+
+def get_max_pr():
+    if os.path.exists(MAX_FILE):
+        result = int(open(MAX_FILE, 'r').read())
+        print "Read largest PR number previously seen: %s" % result
+        return result
+    else:
+        return 0
+
+jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
+                                basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
+
+jira_prs = get_jira_prs()
+
+previous_max = get_max_pr()
+print "Retrieved %s JIRA PR's from Github" % len(jira_prs)
+jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
+print "%s PR's remain after excluding visted ones" % len(jira_prs)
+
+num_updates = 0
+considered = []
+for issue, pr in sorted(jira_prs, key=lambda (k, v): int(v['number'])):
+    if num_updates >= MAX_UPDATES:
+      break
+    pr_num = int(pr['number'])
+
+    print "Checking issue %s" % issue
+    considered = considered + [pr_num]
+
+    url = pr['html_url']
+    title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login']) 
+  
+    existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
+    if url in existing_links:
+        continue
+
+    icon = {"title": "Pull request #%s" % pr['number'], 
+      "url16x16": "https://assets-cdn.github.com/favicon.ico"}
+    destination = {"title": title, "url": url, "icon": icon}
+    # For all possible fields see:
+    # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links     
+    # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"} 
+    jira_client.add_remote_link(issue, destination)
+    
+    comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
+    comment = comment + ("\n%s" % pr['html_url'])
+    if pr_num >= MIN_COMMENT_PR:
+        jira_client.add_comment(issue, comment)
+    
+    print "Added link %s <-> PR #%s" % (issue, pr['number'])
+    num_updates = num_updates + 1
+
+if len(considered) > 0:
+    set_max_pr(max(considered))
author	Patrick Wendell <pwendell@gmail.com>	2014-07-19 18:19:08 -0700
committer	Patrick Wendell <pwendell@gmail.com>	2014-07-19 18:19:08 -0700
commit	49e472744951d875627d78b0d6e93cd139232929 (patch)
tree	d4521af5c517e327e78af5d8517b24ef2958d37b /dev
parent	1efb3698b6cf39a80683b37124d2736ebf3c9d9a (diff)
download	spark-49e472744951d875627d78b0d6e93cd139232929.tar.gz spark-49e472744951d875627d78b0d6e93cd139232929.tar.bz2 spark-49e472744951d875627d78b0d6e93cd139232929.zip