diff options
author | Brennon York <brennon.york@capitalone.com> | 2015-10-18 22:45:14 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2015-10-18 22:45:27 -0700 |
commit | d3180c25d8cf0899a7238e7d24b35c5ae918cc1d (patch) | |
tree | 851119cf665da91ea8b641ccafd9378aa0db3d90 /dev/run-tests-jenkins.py | |
parent | 94c8fef296e5cdac9a93ed34acc079e51839caa7 (diff) | |
download | spark-d3180c25d8cf0899a7238e7d24b35c5ae918cc1d.tar.gz spark-d3180c25d8cf0899a7238e7d24b35c5ae918cc1d.tar.bz2 spark-d3180c25d8cf0899a7238e7d24b35c5ae918cc1d.zip |
[SPARK-7018][BUILD] Refactor dev/run-tests-jenkins into Python
This commit refactors the `run-tests-jenkins` script into Python. This refactoring was done by brennonyork in #7401; this PR contains a few minor edits from joshrosen in order to bring it up to date with other recent changes.
From the original PR description (by brennonyork):
Currently a few things are left out that, could and I think should, be smaller JIRA's after this.
1. There are still a few areas where we use environment variables where we don't need to (like `CURRENT_BLOCK`). I might get around to fixing this one in lieu of everything else, but wanted to point that out.
2. The PR tests are still written in bash. I opted to not change those and just rewrite the runner into Python. This is a great follow-on JIRA IMO.
3. All of the linting scripts are still in bash as well and would likely do to just add those in as follow-on JIRA's as well.
Closes #7401.
Author: Brennon York <brennon.york@capitalone.com>
Closes #9161 from JoshRosen/run-tests-jenkins-refactoring.
Diffstat (limited to 'dev/run-tests-jenkins.py')
-rwxr-xr-x | dev/run-tests-jenkins.py | 228 |
1 files changed, 228 insertions, 0 deletions
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py new file mode 100755 index 0000000000..623004310e --- /dev/null +++ b/dev/run-tests-jenkins.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python2 + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function +import os +import sys +import json +import urllib2 +import functools +import subprocess + +from sparktestsupport import SPARK_HOME, ERROR_CODES +from sparktestsupport.shellutils import run_cmd + + +def print_err(msg): + """ + Given a set of arguments, will print them to the STDERR stream + """ + print(msg, file=sys.stderr) + + +def post_message_to_github(msg, ghprb_pull_id): + print("Attempting to post to Github...") + + url = "https://api.github.com/repos/apache/spark/issues/" + ghprb_pull_id + "/comments" + github_oauth_key = os.environ["GITHUB_OAUTH_KEY"] + + posted_message = json.dumps({"body": msg}) + request = urllib2.Request(url, + headers={ + "Authorization": "token %s" % github_oauth_key, + "Content-Type": "application/json" + }, + data=posted_message) + try: + response = urllib2.urlopen(request) + + if response.getcode() == 201: + print(" > Post successful.") + except urllib2.HTTPError as http_e: + print_err("Failed to post message to Github.") + print_err(" > http_code: %s" % http_e.code) + print_err(" > api_response: %s" % http_e.read()) + print_err(" > data: %s" % posted_message) + except urllib2.URLError as url_e: + print_err("Failed to post message to Github.") + print_err(" > urllib2_status: %s" % url_e.reason[1]) + print_err(" > data: %s" % posted_message) + + +def pr_message(build_display_name, + build_url, + ghprb_pull_id, + short_commit_hash, + commit_url, + msg, + post_msg=''): + # align the arguments properly for string formatting + str_args = (build_display_name, + msg, + build_url, + ghprb_pull_id, + short_commit_hash, + commit_url, + str(' ' + post_msg + '.') if post_msg else '.') + return '**[Test build %s %s](%sconsoleFull)** for PR %s at commit [`%s`](%s)%s' % str_args + + +def run_pr_checks(pr_tests, ghprb_actual_commit, sha1): + """ + Executes a set of pull request checks to ease development and report issues with various + components such as style, linting, dependencies, compatibilities, etc. + @return a list of messages to post back to Github + """ + # Ensure we save off the current HEAD to revert to + current_pr_head = run_cmd(['git', 'rev-parse', 'HEAD'], return_output=True).strip() + pr_results = list() + + for pr_test in pr_tests: + test_name = pr_test + '.sh' + pr_results.append(run_cmd(['bash', os.path.join(SPARK_HOME, 'dev', 'tests', test_name), + ghprb_actual_commit, sha1], + return_output=True).rstrip()) + # Ensure, after each test, that we're back on the current PR + run_cmd(['git', 'checkout', '-f', current_pr_head]) + return pr_results + + +def run_tests(tests_timeout): + """ + Runs the `dev/run-tests` script and responds with the correct error message + under the various failure scenarios. + @return a tuple containing the test result code and the result note to post to Github + """ + + test_result_code = subprocess.Popen(['timeout', + tests_timeout, + os.path.join(SPARK_HOME, 'dev', 'run-tests')]).wait() + + failure_note_by_errcode = { + 1: 'executing the `dev/run-tests` script', # error to denote run-tests script failures + ERROR_CODES["BLOCK_GENERAL"]: 'some tests', + ERROR_CODES["BLOCK_RAT"]: 'RAT tests', + ERROR_CODES["BLOCK_SCALA_STYLE"]: 'Scala style tests', + ERROR_CODES["BLOCK_PYTHON_STYLE"]: 'Python style tests', + ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests', + ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation', + ERROR_CODES["BLOCK_BUILD"]: 'to build', + ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests', + ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests', + ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests', + ERROR_CODES["BLOCK_SPARKR_UNIT_TESTS"]: 'SparkR unit tests', + ERROR_CODES["BLOCK_TIMEOUT"]: 'from timeout after a configured wait of \`%s\`' % ( + tests_timeout) + } + + if test_result_code == 0: + test_result_note = ' * This patch passes all tests.' + else: + test_result_note = ' * This patch **fails %s**.' % failure_note_by_errcode[test_result_code] + + return [test_result_code, test_result_note] + + +def main(): + # Important Environment Variables + # --- + # $ghprbActualCommit + # This is the hash of the most recent commit in the PR. + # The merge-base of this and master is the commit from which the PR was branched. + # $sha1 + # If the patch merges cleanly, this is a reference to the merge commit hash + # (e.g. "origin/pr/2606/merge"). + # If the patch does not merge cleanly, it is equal to $ghprbActualCommit. + # The merge-base of this and master in the case of a clean merge is the most recent commit + # against master. + ghprb_pull_id = os.environ["ghprbPullId"] + ghprb_actual_commit = os.environ["ghprbActualCommit"] + ghprb_pull_title = os.environ["ghprbPullTitle"] + sha1 = os.environ["sha1"] + + # Marks this build as a pull request build. + os.environ["AMP_JENKINS_PRB"] = "true" + # Switch to a Maven-based build if the PR title contains "test-maven": + if "test-maven" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven" + # Switch the Hadoop profile based on the PR title: + if "test-hadoop1.0" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0" + if "test-hadoop2.2" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0" + if "test-hadoop2.2" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2" + if "test-hadoop2.3" in ghprb_pull_title: + os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.3" + + build_display_name = os.environ["BUILD_DISPLAY_NAME"] + build_url = os.environ["BUILD_URL"] + + commit_url = "https://github.com/apache/spark/commit/" + ghprb_actual_commit + + # GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :( + short_commit_hash = ghprb_actual_commit[0:7] + + # format: http://linux.die.net/man/1/timeout + # must be less than the timeout configured on Jenkins (currently 300m) + tests_timeout = "250m" + + # Array to capture all test names to run on the pull request. These tests are represented + # by their file equivalents in the dev/tests/ directory. + # + # To write a PR test: + # * the file must reside within the dev/tests directory + # * be an executable bash script + # * accept three arguments on the command line, the first being the Github PR long commit + # hash, the second the Github SHA1 hash, and the final the current PR hash + # * and, lastly, return string output to be included in the pr message output that will + # be posted to Github + pr_tests = [ + "pr_merge_ability", + "pr_public_classes" + # DISABLED (pwendell) "pr_new_dependencies" + ] + + # `bind_message_base` returns a function to generate messages for Github posting + github_message = functools.partial(pr_message, + build_display_name, + build_url, + ghprb_pull_id, + short_commit_hash, + commit_url) + + # post start message + post_message_to_github(github_message('has started'), ghprb_pull_id) + + pr_check_results = run_pr_checks(pr_tests, ghprb_actual_commit, sha1) + + test_result_code, test_result_note = run_tests(tests_timeout) + + # post end message + result_message = github_message('has finished') + result_message += '\n' + test_result_note + '\n' + result_message += '\n'.join(pr_check_results) + + post_message_to_github(result_message, ghprb_pull_id) + + sys.exit(test_result_code) + + +if __name__ == "__main__": + main() |