dev/run-tests-jenkins.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226

#!/usr/bin/env python2

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from __future__ import print_function
import os
import sys
import json
import urllib2
import functools
import subprocess

from sparktestsupport import SPARK_HOME, ERROR_CODES
from sparktestsupport.shellutils import run_cmd


def print_err(msg):
    """
    Given a set of arguments, will print them to the STDERR stream
    """
    print(msg, file=sys.stderr)


def post_message_to_github(msg, ghprb_pull_id):
    print("Attempting to post to Github...")

    url = "https://api.github.com/repos/apache/spark/issues/" + ghprb_pull_id + "/comments"
    github_oauth_key = os.environ["GITHUB_OAUTH_KEY"]

    posted_message = json.dumps({"body": msg})
    request = urllib2.Request(url,
                              headers={
                                  "Authorization": "token %s" % github_oauth_key,
                                  "Content-Type": "application/json"
                              },
                              data=posted_message)
    try:
        response = urllib2.urlopen(request)

        if response.getcode() == 201:
            print(" > Post successful.")
    except urllib2.HTTPError as http_e:
        print_err("Failed to post message to Github.")
        print_err(" > http_code: %s" % http_e.code)
        print_err(" > api_response: %s" % http_e.read())
        print_err(" > data: %s" % posted_message)
    except urllib2.URLError as url_e:
        print_err("Failed to post message to Github.")
        print_err(" > urllib2_status: %s" % url_e.reason[1])
        print_err(" > data: %s" % posted_message)


def pr_message(build_display_name,
               build_url,
               ghprb_pull_id,
               short_commit_hash,
               commit_url,
               msg,
               post_msg=''):
    # align the arguments properly for string formatting
    str_args = (build_display_name,
                msg,
                build_url,
                ghprb_pull_id,
                short_commit_hash,
                commit_url,
                str(' ' + post_msg + '.') if post_msg else '.')
    return '**[Test build %s %s](%stestReport)** for PR %s at commit [`%s`](%s)%s' % str_args


def run_pr_checks(pr_tests, ghprb_actual_commit, sha1):
    """
    Executes a set of pull request checks to ease development and report issues with various
    components such as style, linting, dependencies, compatibilities, etc.
    @return a list of messages to post back to Github
    """
    # Ensure we save off the current HEAD to revert to
    current_pr_head = run_cmd(['git', 'rev-parse', 'HEAD'], return_output=True).strip()
    pr_results = list()

    for pr_test in pr_tests:
        test_name = pr_test + '.sh'
        pr_results.append(run_cmd(['bash', os.path.join(SPARK_HOME, 'dev', 'tests', test_name),
                                   ghprb_actual_commit, sha1],
                                  return_output=True).rstrip())
        # Ensure, after each test, that we're back on the current PR
        run_cmd(['git', 'checkout', '-f', current_pr_head])
    return pr_results


def run_tests(tests_timeout):
    """
    Runs the `dev/run-tests` script and responds with the correct error message
    under the various failure scenarios.
    @return a tuple containing the test result code and the result note to post to Github
    """

    test_result_code = subprocess.Popen(['timeout',
                                         tests_timeout,
                                         os.path.join(SPARK_HOME, 'dev', 'run-tests')]).wait()

    failure_note_by_errcode = {
        1: 'executing the `dev/run-tests` script',  # error to denote run-tests script failures
        ERROR_CODES["BLOCK_GENERAL"]: 'some tests',
        ERROR_CODES["BLOCK_RAT"]: 'RAT tests',
        ERROR_CODES["BLOCK_SCALA_STYLE"]: 'Scala style tests',
        ERROR_CODES["BLOCK_JAVA_STYLE"]: 'Java style tests',
        ERROR_CODES["BLOCK_PYTHON_STYLE"]: 'Python style tests',
        ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests',
        ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation',
        ERROR_CODES["BLOCK_BUILD"]: 'to build',
        ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests',
        ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
        ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
        ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
        ERROR_CODES["BLOCK_PYSPARK_PIP_TESTS"]: 'PySpark pip packaging tests',
        ERROR_CODES["BLOCK_SPARKR_UNIT_TESTS"]: 'SparkR unit tests',
        ERROR_CODES["BLOCK_TIMEOUT"]: 'from timeout after a configured wait of \`%s\`' % (
            tests_timeout)
    }

    if test_result_code == 0:
        test_result_note = ' * This patch passes all tests.'
    else:
        test_result_note = ' * This patch **fails %s**.' % failure_note_by_errcode[test_result_code]

    return [test_result_code, test_result_note]


def main():
    # Important Environment Variables
    # ---
    # $ghprbActualCommit
    #   This is the hash of the most recent commit in the PR.
    #   The merge-base of this and master is the commit from which the PR was branched.
    # $sha1
    #   If the patch merges cleanly, this is a reference to the merge commit hash
    #     (e.g. "origin/pr/2606/merge").
    #   If the patch does not merge cleanly, it is equal to $ghprbActualCommit.
    #   The merge-base of this and master in the case of a clean merge is the most recent commit
    #     against master.
    ghprb_pull_id = os.environ["ghprbPullId"]
    ghprb_actual_commit = os.environ["ghprbActualCommit"]
    ghprb_pull_title = os.environ["ghprbPullTitle"]
    sha1 = os.environ["sha1"]

    # Marks this build as a pull request build.
    os.environ["AMP_JENKINS_PRB"] = "true"
    # Switch to a Maven-based build if the PR title contains "test-maven":
    if "test-maven" in ghprb_pull_title:
        os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
    # Switch the Hadoop profile based on the PR title:
    if "test-hadoop2.6" in ghprb_pull_title:
        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6"
    if "test-hadoop2.7" in ghprb_pull_title:
        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"

    build_display_name = os.environ["BUILD_DISPLAY_NAME"]
    build_url = os.environ["BUILD_URL"]

    commit_url = "https://github.com/apache/spark/commit/" + ghprb_actual_commit

    # GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
    short_commit_hash = ghprb_actual_commit[0:7]

    # format: http://linux.die.net/man/1/timeout
    # must be less than the timeout configured on Jenkins (currently 300m)
    tests_timeout = "250m"

    # Array to capture all test names to run on the pull request. These tests are represented
    # by their file equivalents in the dev/tests/ directory.
    #
    # To write a PR test:
    #   * the file must reside within the dev/tests directory
    #   * be an executable bash script
    #   * accept three arguments on the command line, the first being the Github PR long commit
    #     hash, the second the Github SHA1 hash, and the final the current PR hash
    #   * and, lastly, return string output to be included in the pr message output that will
    #     be posted to Github
    pr_tests = [
        "pr_merge_ability",
        "pr_public_classes"
    ]

    # `bind_message_base` returns a function to generate messages for Github posting
    github_message = functools.partial(pr_message,
                                       build_display_name,
                                       build_url,
                                       ghprb_pull_id,
                                       short_commit_hash,
                                       commit_url)

    # post start message
    post_message_to_github(github_message('has started'), ghprb_pull_id)

    pr_check_results = run_pr_checks(pr_tests, ghprb_actual_commit, sha1)

    test_result_code, test_result_note = run_tests(tests_timeout)

    # post end message
    result_message = github_message('has finished')
    result_message += '\n' + test_result_note + '\n'
    result_message += '\n'.join(pr_check_results)

    post_message_to_github(result_message, ghprb_pull_id)

    sys.exit(test_result_code)


if __name__ == "__main__":
    main()