aboutsummaryrefslogtreecommitdiff
path: root/dev
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-01-02 15:23:19 +0000
committerSean Owen <sowen@cloudera.com>2017-01-02 15:23:19 +0000
commit46b212602428f1f11c184c836b4e09c150d0ee30 (patch)
treeb30420dbdfe979f65c390edbfe2d103572c07501 /dev
parentf1330b1d9e7b1d5de611e59eecae1bf0b0616d81 (diff)
downloadspark-46b212602428f1f11c184c836b4e09c150d0ee30.tar.gz
spark-46b212602428f1f11c184c836b4e09c150d0ee30.tar.bz2
spark-46b212602428f1f11c184c836b4e09c150d0ee30.zip
[SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts
## What changes were proposed in this pull request? This PR proposes to check pep8 against all other Python scripts and fix the errors as below: ```bash ./dev/create-release/generate-contributors.py ./dev/create-release/releaseutils.py ./dev/create-release/translate-contributors.py ./dev/lint-python ./python/docs/epytext.py ./examples/src/main/python/mllib/decision_tree_classification_example.py ./examples/src/main/python/mllib/decision_tree_regression_example.py ./examples/src/main/python/mllib/gradient_boosting_classification_example.py ./examples/src/main/python/mllib/gradient_boosting_regression_example.py ./examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./examples/src/main/python/mllib/naive_bayes_example.py ./examples/src/main/python/mllib/random_forest_classification_example.py ./examples/src/main/python/mllib/random_forest_regression_example.py ./examples/src/main/python/mllib/svm_with_sgd_example.py ./examples/src/main/python/streaming/network_wordjoinsentiments.py ./sql/hive/src/test/resources/data/scripts/cat.py ./sql/hive/src/test/resources/data/scripts/cat_error.py ./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py ./sql/hive/src/test/resources/data/scripts/dumpdata_script.py ./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py ./sql/hive/src/test/resources/data/scripts/escapednewline.py ./sql/hive/src/test/resources/data/scripts/escapedtab.py ./sql/hive/src/test/resources/data/scripts/input20_script.py ./sql/hive/src/test/resources/data/scripts/newline.py ``` ## How was this patch tested? - `./python/docs/epytext.py` ```bash cd ./python/docs $$ make html ``` - pep8 check (Python 2.7 / Python 3.3.6) ``` ./dev/lint-python ``` - `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python -m doctest -v ./dev/merge_spark_pr.py ``` - `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python generate-contributors.py python translate-contributors.py ``` - Examples (Python 2.7 / Python 3.3.6) ```bash ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py ``` - Examples (Python 2.7 only / Python 3.3.6 not working) ``` ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py ``` - `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes) Manually tested only changed ones. - `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working) Manually tested this after disabling actually adding comments and links. And also via Jenkins tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16405 from HyukjinKwon/minor-pep8.
Diffstat (limited to 'dev')
-rwxr-xr-xdev/create-release/generate-contributors.py145
-rwxr-xr-xdev/create-release/releaseutils.py73
-rwxr-xr-xdev/create-release/translate-contributors.py78
-rwxr-xr-xdev/github_jira_sync.py86
-rwxr-xr-xdev/lint-python6
-rwxr-xr-xdev/merge_spark_pr.py85
6 files changed, 282 insertions, 191 deletions
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index db9c680a4b..131d81c8a7 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -33,14 +33,14 @@ PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
while not tag_exists(RELEASE_TAG):
RELEASE_TAG = raw_input("Please provide a valid release tag: ")
while not tag_exists(PREVIOUS_RELEASE_TAG):
- print "Please specify the previous release tag."
- PREVIOUS_RELEASE_TAG = raw_input(\
- "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
+ print("Please specify the previous release tag.")
+ PREVIOUS_RELEASE_TAG = raw_input(
+ "For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
# Gather commits found in the new tag but not in the old tag.
# This filters commits based on both the git hash and the PR number.
# If either is present in the old tag, then we ignore the commit.
-print "Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)
+print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
release_commits = get_commits(RELEASE_TAG)
previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG)
previous_release_hashes = set()
@@ -62,17 +62,20 @@ if not new_commits:
sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG))
# Prompt the user for confirmation that the commit range is correct
-print "\n=================================================================================="
-print "JIRA server: %s" % JIRA_API_BASE
-print "Release tag: %s" % RELEASE_TAG
-print "Previous release tag: %s" % PREVIOUS_RELEASE_TAG
-print "Number of commits in this range: %s" % len(new_commits)
+print("\n==================================================================================")
+print("JIRA server: %s" % JIRA_API_BASE)
+print("Release tag: %s" % RELEASE_TAG)
+print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
+print("Number of commits in this range: %s" % len(new_commits))
print
+
+
def print_indented(_list):
- for x in _list: print " %s" % x
+ for x in _list:
+ print(" %s" % x)
if yesOrNoPrompt("Show all commits?"):
print_indented(new_commits)
-print "==================================================================================\n"
+print("==================================================================================\n")
if not yesOrNoPrompt("Does this look correct?"):
sys.exit("Ok, exiting")
@@ -82,45 +85,76 @@ maintenance = []
reverts = []
nojiras = []
filtered_commits = []
+
+
def is_release(commit_title):
- return re.findall("\[release\]", commit_title.lower()) or\
- "preparing spark release" in commit_title.lower() or\
- "preparing development version" in commit_title.lower() or\
- "CHANGES.txt" in commit_title
+ return re.findall("\[release\]", commit_title.lower()) or \
+ "preparing spark release" in commit_title.lower() or \
+ "preparing development version" in commit_title.lower() or \
+ "CHANGES.txt" in commit_title
+
+
def is_maintenance(commit_title):
- return "maintenance" in commit_title.lower() or\
- "manually close" in commit_title.lower()
+ return "maintenance" in commit_title.lower() or \
+ "manually close" in commit_title.lower()
+
+
def has_no_jira(commit_title):
return not re.findall("SPARK-[0-9]+", commit_title.upper())
+
+
def is_revert(commit_title):
return "revert" in commit_title.lower()
+
+
def is_docs(commit_title):
- return re.findall("docs*", commit_title.lower()) or\
- "programming guide" in commit_title.lower()
+ return re.findall("docs*", commit_title.lower()) or \
+ "programming guide" in commit_title.lower()
+
+
for c in new_commits:
t = c.get_title()
- if not t: continue
- elif is_release(t): releases.append(c)
- elif is_maintenance(t): maintenance.append(c)
- elif is_revert(t): reverts.append(c)
- elif is_docs(t): filtered_commits.append(c) # docs may not have JIRA numbers
- elif has_no_jira(t): nojiras.append(c)
- else: filtered_commits.append(c)
+ if not t:
+ continue
+ elif is_release(t):
+ releases.append(c)
+ elif is_maintenance(t):
+ maintenance.append(c)
+ elif is_revert(t):
+ reverts.append(c)
+ elif is_docs(t):
+ filtered_commits.append(c) # docs may not have JIRA numbers
+ elif has_no_jira(t):
+ nojiras.append(c)
+ else:
+ filtered_commits.append(c)
# Warn against ignored commits
if releases or maintenance or reverts or nojiras:
- print "\n=================================================================================="
- if releases: print "Found %d release commits" % len(releases)
- if maintenance: print "Found %d maintenance commits" % len(maintenance)
- if reverts: print "Found %d revert commits" % len(reverts)
- if nojiras: print "Found %d commits with no JIRA" % len(nojiras)
- print "* Warning: these commits will be ignored.\n"
+ print("\n==================================================================================")
+ if releases:
+ print("Found %d release commits" % len(releases))
+ if maintenance:
+ print("Found %d maintenance commits" % len(maintenance))
+ if reverts:
+ print("Found %d revert commits" % len(reverts))
+ if nojiras:
+ print("Found %d commits with no JIRA" % len(nojiras))
+ print("* Warning: these commits will be ignored.\n")
if yesOrNoPrompt("Show ignored commits?"):
- if releases: print "Release (%d)" % len(releases); print_indented(releases)
- if maintenance: print "Maintenance (%d)" % len(maintenance); print_indented(maintenance)
- if reverts: print "Revert (%d)" % len(reverts); print_indented(reverts)
- if nojiras: print "No JIRA (%d)" % len(nojiras); print_indented(nojiras)
- print "==================== Warning: the above commits will be ignored ==================\n"
+ if releases:
+ print("Release (%d)" % len(releases))
+ print_indented(releases)
+ if maintenance:
+ print("Maintenance (%d)" % len(maintenance))
+ print_indented(maintenance)
+ if reverts:
+ print("Revert (%d)" % len(reverts))
+ print_indented(reverts)
+ if nojiras:
+ print("No JIRA (%d)" % len(nojiras))
+ print_indented(nojiras)
+ print("==================== Warning: the above commits will be ignored ==================\n")
prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % len(filtered_commits)
if not yesOrNoPrompt(prompt_msg):
sys.exit("Ok, exiting.")
@@ -147,9 +181,9 @@ invalid_authors = {}
# }
#
author_info = {}
-jira_options = { "server": JIRA_API_BASE }
-jira_client = JIRA(options = jira_options)
-print "\n=========================== Compiling contributor list ==========================="
+jira_options = {"server": JIRA_API_BASE}
+jira_client = JIRA(options=jira_options)
+print("\n=========================== Compiling contributor list ===========================")
for commit in filtered_commits:
_hash = commit.get_hash()
title = commit.get_title()
@@ -168,8 +202,9 @@ for commit in filtered_commits:
# Parse components from the commit title, if any
commit_components = find_components(title, _hash)
# Populate or merge an issue into author_info[author]
+
def populate(issue_type, components):
- components = components or [CORE_COMPONENT] # assume core if no components provided
+ components = components or [CORE_COMPONENT] # assume core if no components provided
if author not in author_info:
author_info[author] = {}
if issue_type not in author_info[author]:
@@ -182,17 +217,17 @@ for commit in filtered_commits:
jira_issue = jira_client.issue(issue)
jira_type = jira_issue.fields.issuetype.name
jira_type = translate_issue_type(jira_type, issue, warnings)
- jira_components = [translate_component(c.name, _hash, warnings)\
- for c in jira_issue.fields.components]
+ jira_components = [translate_component(c.name, _hash, warnings)
+ for c in jira_issue.fields.components]
all_components = set(jira_components + commit_components)
populate(jira_type, all_components)
except Exception as e:
- print "Unexpected error:", e
+ print("Unexpected error:", e)
# For docs without an associated JIRA, manually add it ourselves
if is_docs(title) and not issues:
populate("documentation", commit_components)
- print " Processed commit %s authored by %s on %s" % (_hash, author, date)
-print "==================================================================================\n"
+ print(" Processed commit %s authored by %s on %s" % (_hash, author, date))
+print("==================================================================================\n")
# Write to contributors file ordered by author names
# Each line takes the format " * Author name -- semi-colon delimited contributions"
@@ -215,8 +250,8 @@ for author in authors:
# Otherwise, group contributions by issue types instead of modules
# e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
else:
- contributions = ["%s in %s" % (issue_type, nice_join(comps)) \
- for issue_type, comps in author_info[author].items()]
+ contributions = ["%s in %s" % (issue_type, nice_join(comps))
+ for issue_type, comps in author_info[author].items()]
contribution = "; ".join(contributions)
# Do not use python's capitalize() on the whole string to preserve case
assert contribution
@@ -226,11 +261,11 @@ for author in authors:
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
if author in invalid_authors and invalid_authors[author]:
author = author + "/" + "/".join(invalid_authors[author])
- #line = " * %s -- %s" % (author, contribution)
+ # line = " * %s -- %s" % (author, contribution)
line = author
contributors_file.write(line + "\n")
contributors_file.close()
-print "Contributors list is successfully written to %s!" % contributors_file_name
+print("Contributors list is successfully written to %s!" % contributors_file_name)
# Prompt the user to translate author names if necessary
if invalid_authors:
@@ -241,8 +276,8 @@ if invalid_authors:
# Log any warnings encountered in the process
if warnings:
- print "\n============ Warnings encountered while creating the contributor list ============"
- for w in warnings: print w
- print "Please correct these in the final contributors list at %s." % contributors_file_name
- print "==================================================================================\n"
-
+ print("\n============ Warnings encountered while creating the contributor list ============")
+ for w in warnings:
+ print(w)
+ print("Please correct these in the final contributors list at %s." % contributors_file_name)
+ print("==================================================================================\n")
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index 5d0ac16b3b..730138195e 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -30,28 +30,29 @@ try:
except ImportError:
from jira.utils import JIRAError
except ImportError:
- print "This tool requires the jira-python library"
- print "Install using 'sudo pip install jira'"
+ print("This tool requires the jira-python library")
+ print("Install using 'sudo pip install jira'")
sys.exit(-1)
try:
from github import Github
from github import GithubException
except ImportError:
- print "This tool requires the PyGithub library"
- print "Install using 'sudo pip install PyGithub'"
+ print("This tool requires the PyGithub library")
+ print("Install using 'sudo pip install PyGithub'")
sys.exit(-1)
try:
import unidecode
except ImportError:
- print "This tool requires the unidecode library to decode obscure github usernames"
- print "Install using 'sudo pip install unidecode'"
+ print("This tool requires the unidecode library to decode obscure github usernames")
+ print("Install using 'sudo pip install unidecode'")
sys.exit(-1)
# Contributors list file name
contributors_file_name = "contributors.txt"
+
# Prompt the user to answer yes or no until they do so
def yesOrNoPrompt(msg):
response = raw_input("%s [y/n]: " % msg)
@@ -59,30 +60,50 @@ def yesOrNoPrompt(msg):
return yesOrNoPrompt(msg)
return response == "y"
+
# Utility functions run git commands (written with Git 1.8.5)
-def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
-def run_cmd_error(cmd): return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
+def run_cmd(cmd):
+ return Popen(cmd, stdout=PIPE).communicate()[0]
+
+
+def run_cmd_error(cmd):
+ return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
+
+
def get_date(commit_hash):
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
+
+
def tag_exists(tag):
stderr = run_cmd_error(["git", "show", tag])
return "error" not in stderr
+
# A type-safe representation of a commit
class Commit:
- def __init__(self, _hash, author, title, pr_number = None):
+ def __init__(self, _hash, author, title, pr_number=None):
self._hash = _hash
self.author = author
self.title = title
self.pr_number = pr_number
- def get_hash(self): return self._hash
- def get_author(self): return self.author
- def get_title(self): return self.title
- def get_pr_number(self): return self.pr_number
+
+ def get_hash(self):
+ return self._hash
+
+ def get_author(self):
+ return self.author
+
+ def get_title(self):
+ return self.title
+
+ def get_pr_number(self):
+ return self.pr_number
+
def __str__(self):
closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
+
# Return all commits that belong to the specified tag.
#
# Under the hood, this runs a `git log` on that tag and parses the fields
@@ -106,8 +127,9 @@ def get_commits(tag):
raw_commits = [c for c in output.split(commit_start_marker) if c]
for commit in raw_commits:
if commit.count(commit_end_marker) != 1:
- print "Commit end marker not found in commit: "
- for line in commit.split("\n"): print line
+ print("Commit end marker not found in commit: ")
+ for line in commit.split("\n"):
+ print(line)
sys.exit(1)
# Separate commit digest from the body
# From the digest we extract the hash, author and the title
@@ -178,6 +200,7 @@ known_components = {
"yarn": "YARN"
}
+
# Translate issue types using a format appropriate for writing contributions
# If an unknown issue type is encountered, warn the user
def translate_issue_type(issue_type, issue_id, warnings):
@@ -188,6 +211,7 @@ def translate_issue_type(issue_type, issue_id, warnings):
warnings.append("Unknown issue type \"%s\" (see %s)" % (issue_type, issue_id))
return issue_type
+
# Translate component names using a format appropriate for writing contributions
# If an unknown component is encountered, warn the user
def translate_component(component, commit_hash, warnings):
@@ -198,20 +222,22 @@ def translate_component(component, commit_hash, warnings):
warnings.append("Unknown component \"%s\" (see %s)" % (component, commit_hash))
return component
+
# Parse components in the commit message
# The returned components are already filtered and translated
def find_components(commit, commit_hash):
components = re.findall("\[\w*\]", commit.lower())
- components = [translate_component(c, commit_hash)\
- for c in components if c in known_components]
+ components = [translate_component(c, commit_hash)
+ for c in components if c in known_components]
return components
+
# Join a list of strings in a human-readable manner
# e.g. ["Juice"] -> "Juice"
# e.g. ["Juice", "baby"] -> "Juice and baby"
# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
def nice_join(str_list):
- str_list = list(str_list) # sometimes it's a set
+ str_list = list(str_list) # sometimes it's a set
if not str_list:
return ""
elif len(str_list) == 1:
@@ -221,6 +247,7 @@ def nice_join(str_list):
else:
return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
+
# Return the full name of the specified user on Github
# If the user doesn't exist, return None
def get_github_name(author, github_client):
@@ -233,6 +260,7 @@ def get_github_name(author, github_client):
raise e
return None
+
# Return the full name of the specified user on JIRA
# If the user doesn't exist, return None
def get_jira_name(author, jira_client):
@@ -245,15 +273,18 @@ def get_jira_name(author, jira_client):
raise e
return None
+
# Return whether the given name is in the form <First Name><space><Last Name>
def is_valid_author(author):
- if not author: return False
+ if not author:
+ return False
return " " in author and not re.findall("[0-9]", author)
+
# Capitalize the first letter of each word in the given author name
def capitalize_author(author):
- if not author: return None
+ if not author:
+ return None
words = author.split(" ")
words = [w[0].capitalize() + w[1:] for w in words if w]
return " ".join(words)
-
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
index 2cc64e4444..be30e6ad30 100755
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@@ -45,8 +45,8 @@ if not GITHUB_API_TOKEN:
# Write new contributors list to <old_file_name>.final
if not os.path.isfile(contributors_file_name):
- print "Contributors file %s does not exist!" % contributors_file_name
- print "Have you run ./generate-contributors.py yet?"
+ print("Contributors file %s does not exist!" % contributors_file_name)
+ print("Have you run ./generate-contributors.py yet?")
sys.exit(1)
contributors_file = open(contributors_file_name, "r")
warnings = []
@@ -58,11 +58,11 @@ if len(sys.argv) > 1:
if "--non-interactive" in options:
INTERACTIVE_MODE = False
if INTERACTIVE_MODE:
- print "Running in interactive mode. To disable this, provide the --non-interactive flag."
+ print("Running in interactive mode. To disable this, provide the --non-interactive flag.")
# Setup Github and JIRA clients
-jira_options = { "server": JIRA_API_BASE }
-jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
+jira_options = {"server": JIRA_API_BASE}
+jira_client = JIRA(options=jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
github_client = Github(GITHUB_API_TOKEN)
# Load known author translations that are cached locally
@@ -70,7 +70,8 @@ known_translations = {}
known_translations_file_name = "known_translations"
known_translations_file = open(known_translations_file_name, "r")
for line in known_translations_file:
- if line.startswith("#"): continue
+ if line.startswith("#"):
+ continue
[old_name, new_name] = line.strip("\n").split(" - ")
known_translations[old_name] = new_name
known_translations_file.close()
@@ -91,6 +92,8 @@ known_translations_file = open(known_translations_file_name, "a")
# (NOT_FOUND, "No assignee found for SPARK-1763")
# ]
NOT_FOUND = "Not found"
+
+
def generate_candidates(author, issues):
candidates = []
# First check for full name of Github user
@@ -121,9 +124,11 @@ def generate_candidates(author, issues):
user_name = jira_assignee.name
display_name = jira_assignee.displayName
if display_name:
- candidates.append((display_name, "Full name of %s assignee %s" % (issue, user_name)))
+ candidates.append(
+ (display_name, "Full name of %s assignee %s" % (issue, user_name)))
else:
- candidates.append((NOT_FOUND, "No full name found for %s assignee %" % (issue, user_name)))
+ candidates.append(
+ (NOT_FOUND, "No full name found for %s assignee %s" % (issue, user_name)))
else:
candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
# Guard against special characters in candidate names
@@ -143,18 +148,18 @@ def generate_candidates(author, issues):
# select from this list. Additionally, the user may also choose to enter a custom name.
# In non-interactive mode, this script picks the first valid author name from the candidates
# If no such name exists, the original name is used (without the JIRA numbers).
-print "\n========================== Translating contributor list =========================="
+print("\n========================== Translating contributor list ==========================")
lines = contributors_file.readlines()
contributions = []
for i, line in enumerate(lines):
# It is possible that a line in the contributor file only has the github name, e.g. yhuai.
# So, we need a strip() to remove the newline.
temp_author = line.strip(" * ").split(" -- ")[0].strip()
- print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
+ print("Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines)))
if not temp_author:
error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
error_msg += " ERROR: Actual = %s" % line
- print error_msg
+ print(error_msg)
warnings.append(error_msg)
contributions.append(line)
continue
@@ -175,8 +180,8 @@ for i, line in enumerate(lines):
# [3] andrewor14 - Raw Github username
# [4] Custom
candidate_names = []
- bad_prompts = [] # Prompts that can't actually be selected; print these first.
- good_prompts = [] # Prompts that contain valid choices
+ bad_prompts = [] # Prompts that can't actually be selected; print these first.
+ good_prompts = [] # Prompts that contain valid choices
for candidate, source in candidates:
if candidate == NOT_FOUND:
bad_prompts.append(" [X] %s" % source)
@@ -186,13 +191,16 @@ for i, line in enumerate(lines):
good_prompts.append(" [%d] %s - %s" % (index, candidate, source))
raw_index = len(candidate_names)
custom_index = len(candidate_names) + 1
- for p in bad_prompts: print p
- if bad_prompts: print " ---"
- for p in good_prompts: print p
+ for p in bad_prompts:
+ print(p)
+ if bad_prompts:
+ print(" ---")
+ for p in good_prompts:
+ print(p)
# In interactive mode, additionally provide "custom" option and await user response
if INTERACTIVE_MODE:
- print " [%d] %s - Raw Github username" % (raw_index, author)
- print " [%d] Custom" % custom_index
+ print(" [%d] %s - Raw Github username" % (raw_index, author))
+ print(" [%d] Custom" % custom_index)
response = raw_input(" Your choice: ")
last_index = custom_index
while not response.isdigit() or int(response) > last_index:
@@ -204,8 +212,8 @@ for i, line in enumerate(lines):
new_author = candidate_names[response]
# In non-interactive mode, just pick the first candidate
else:
- valid_candidate_names = [name for name, _ in candidates\
- if is_valid_author(name) and name != NOT_FOUND]
+ valid_candidate_names = [name for name, _ in candidates
+ if is_valid_author(name) and name != NOT_FOUND]
if valid_candidate_names:
new_author = valid_candidate_names[0]
# Finally, capitalize the author and replace the original one with it
@@ -213,17 +221,20 @@ for i, line in enumerate(lines):
if is_valid_author(new_author):
new_author = capitalize_author(new_author)
else:
- warnings.append("Unable to find a valid name %s for author %s" % (author, temp_author))
- print " * Replacing %s with %s" % (author, new_author)
- # If we are in interactive mode, prompt the user whether we want to remember this new mapping
- if INTERACTIVE_MODE and\
- author not in known_translations and\
- yesOrNoPrompt(" Add mapping %s -> %s to known translations file?" % (author, new_author)):
+ warnings.append(
+ "Unable to find a valid name %s for author %s" % (author, temp_author))
+ print(" * Replacing %s with %s" % (author, new_author))
+ # If we are in interactive mode, prompt the user whether we want to remember this new
+ # mapping
+ if INTERACTIVE_MODE and \
+ author not in known_translations and \
+ yesOrNoPrompt(
+ " Add mapping %s -> %s to known translations file?" % (author, new_author)):
known_translations_file.write("%s - %s\n" % (author, new_author))
known_translations_file.flush()
line = line.replace(temp_author, author)
contributions.append(line)
-print "==================================================================================\n"
+print("==================================================================================\n")
contributors_file.close()
known_translations_file.close()
@@ -244,12 +255,13 @@ for line in contributions:
new_contributors_file.write(line)
new_contributors_file.close()
-print "Translated contributors list successfully written to %s!" % new_contributors_file_name
+print("Translated contributors list successfully written to %s!" % new_contributors_file_name)
# Log any warnings encountered in the process
if warnings:
- print "\n========== Warnings encountered while translating the contributor list ==========="
- for w in warnings: print w
- print "Please manually correct these in the final contributors list at %s." % new_contributors_file_name
- print "==================================================================================\n"
-
+ print("\n========== Warnings encountered while translating the contributor list ===========")
+ for w in warnings:
+ print(w)
+ print("Please manually correct these in the final contributors list at %s." %
+ new_contributors_file_name)
+ print("==================================================================================\n")
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
index 287f0ca24a..acc9aeabbb 100755
--- a/dev/github_jira_sync.py
+++ b/dev/github_jira_sync.py
@@ -27,8 +27,8 @@ import urllib2
try:
import jira.client
except ImportError:
- print "This tool requires the jira-python library"
- print "Install using 'sudo pip install jira'"
+ print("This tool requires the jira-python library")
+ print("Install using 'sudo pip install jira'")
sys.exit(-1)
# User facing configs
@@ -48,16 +48,19 @@ MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496"))
# the state of JIRA's that are tied to PR's we've already looked at.
MAX_FILE = ".github-jira-max"
+
def get_url(url):
try:
return urllib2.urlopen(url)
- except urllib2.HTTPError as e:
- print "Unable to fetch URL, exiting: %s" % url
+ except urllib2.HTTPError:
+ print("Unable to fetch URL, exiting: %s" % url)
sys.exit(-1)
+
def get_json(urllib_response):
return json.load(urllib_response)
+
# Return a list of (JIRA id, JSON dict) tuples:
# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})}
def get_jira_prs():
@@ -65,83 +68,86 @@ def get_jira_prs():
has_next_page = True
page_num = 0
while has_next_page:
- page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
- page_json = get_json(page)
-
- for pull in page_json:
- jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
- for jira in jiras:
- result = result + [(jira, pull)]
-
- # Check if there is another page
- link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
- if not "next"in link_header:
- has_next_page = False
- else:
- page_num = page_num + 1
+ page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num)
+ page_json = get_json(page)
+
+ for pull in page_json:
+ jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title'])
+ for jira in jiras:
+ result = result + [(jira, pull)]
+
+ # Check if there is another page
+ link_header = filter(lambda k: k.startswith("Link"), page.info().headers)[0]
+ if "next" not in link_header:
+ has_next_page = False
+ else:
+ page_num += 1
return result
+
def set_max_pr(max_val):
f = open(MAX_FILE, 'w')
f.write("%s" % max_val)
f.close()
- print "Writing largest PR number seen: %s" % max_val
+ print("Writing largest PR number seen: %s" % max_val)
+
def get_max_pr():
if os.path.exists(MAX_FILE):
result = int(open(MAX_FILE, 'r').read())
- print "Read largest PR number previously seen: %s" % result
+ print("Read largest PR number previously seen: %s" % result)
return result
else:
return 0
+
jira_client = jira.client.JIRA({'server': JIRA_API_BASE},
- basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
+ basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
jira_prs = get_jira_prs()
previous_max = get_max_pr()
-print "Retrieved %s JIRA PR's from Github" % len(jira_prs)
+print("Retrieved %s JIRA PR's from Github" % len(jira_prs))
jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
-print "%s PR's remain after excluding visted ones" % len(jira_prs)
+print("%s PR's remain after excluding visted ones" % len(jira_prs))
num_updates = 0
considered = []
-for issue, pr in sorted(jira_prs, key=lambda (k, v): int(v['number'])):
+for issue, pr in sorted(jira_prs, key=lambda kv: int(kv[1]['number'])):
if num_updates >= MAX_UPDATES:
- break
+ break
pr_num = int(pr['number'])
- print "Checking issue %s" % issue
+ print("Checking issue %s" % issue)
considered = considered + [pr_num]
url = pr['html_url']
- title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
+ title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
try:
- existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
+ existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
except:
- print "Failure reading JIRA %s (does it exist?)" % issue
- print sys.exc_info()[0]
- continue
+ print("Failure reading JIRA %s (does it exist?)" % issue)
+ print(sys.exc_info()[0])
+ continue
if url in existing_links:
continue
- icon = {"title": "Pull request #%s" % pr['number'],
- "url16x16": "https://assets-cdn.github.com/favicon.ico"}
+ icon = {"title": "Pull request #%s" % pr['number'],
+ "url16x16": "https://assets-cdn.github.com/favicon.ico"}
destination = {"title": title, "url": url, "icon": icon}
# For all possible fields see:
- # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
- # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
+ # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
+ # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
jira_client.add_remote_link(issue, destination)
-
+
comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
- comment = comment + ("\n%s" % pr['html_url'])
+ comment += "\n%s" % pr['html_url']
if pr_num >= MIN_COMMENT_PR:
jira_client.add_comment(issue, comment)
-
- print "Added link %s <-> PR #%s" % (issue, pr['number'])
- num_updates = num_updates + 1
+
+ print("Added link %s <-> PR #%s" % (issue, pr['number']))
+ num_updates += 1
if len(considered) > 0:
set_max_pr(max(considered))
diff --git a/dev/lint-python b/dev/lint-python
index 3f878c2dad..c6f3fbfab8 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -19,10 +19,8 @@
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
-PATHS_TO_CHECK="./python/pyspark/ ./examples/src/main/python/ ./dev/sparktestsupport"
-# TODO: fix pep8 errors with the rest of the Python scripts under dev
-PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/*.py ./dev/run-tests-jenkins.py"
-PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/pip-sanity-check.py"
+# Exclude auto-geneated configuration file.
+PATHS_TO_CHECK="$( cd "$SPARK_ROOT_DIR" && find . -name "*.py" -not -path "*python/docs/conf.py" )"
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 5ab285eae9..4bacb38518 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -70,22 +70,22 @@ def get_json(url):
return json.load(urllib2.urlopen(request))
except urllib2.HTTPError as e:
if "X-RateLimit-Remaining" in e.headers and e.headers["X-RateLimit-Remaining"] == '0':
- print "Exceeded the GitHub API rate limit; see the instructions in " + \
- "dev/merge_spark_pr.py to configure an OAuth token for making authenticated " + \
- "GitHub requests."
+ print("Exceeded the GitHub API rate limit; see the instructions in " +
+ "dev/merge_spark_pr.py to configure an OAuth token for making authenticated " +
+ "GitHub requests.")
else:
- print "Unable to fetch URL, exiting: %s" % url
+ print("Unable to fetch URL, exiting: %s" % url)
sys.exit(-1)
def fail(msg):
- print msg
+ print(msg)
clean_up()
sys.exit(-1)
def run_cmd(cmd):
- print cmd
+ print(cmd)
if isinstance(cmd, list):
return subprocess.check_output(cmd)
else:
@@ -97,14 +97,15 @@ def continue_maybe(prompt):
if result.lower() != "y":
fail("Okay, exiting")
+
def clean_up():
- print "Restoring head pointer to %s" % original_head
+ print("Restoring head pointer to %s" % original_head)
run_cmd("git checkout %s" % original_head)
branches = run_cmd("git branch").replace(" ", "").split("\n")
for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches):
- print "Deleting local branch %s" % branch
+ print("Deleting local branch %s" % branch)
run_cmd("git branch -D %s" % branch)
@@ -246,9 +247,9 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
if cur_status == "Resolved" or cur_status == "Closed":
fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status))
- print ("=== JIRA %s ===" % jira_id)
- print ("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" % (
- cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
+ print("=== JIRA %s ===" % jira_id)
+ print("summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n" %
+ (cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))
versions = asf_jira.project_versions("SPARK")
versions = sorted(versions, key=lambda x: x.name, reverse=True)
@@ -282,10 +283,10 @@ def resolve_jira_issue(merge_branches, comment, default_jira_id=""):
resolve = filter(lambda a: a['name'] == "Resolve Issue", asf_jira.transitions(jira_id))[0]
resolution = filter(lambda r: r.raw['name'] == "Fixed", asf_jira.resolutions())[0]
asf_jira.transition_issue(
- jira_id, resolve["id"], fixVersions = jira_fix_versions,
- comment = comment, resolution = {'id': resolution.raw['id']})
+ jira_id, resolve["id"], fixVersions=jira_fix_versions,
+ comment=comment, resolution={'id': resolution.raw['id']})
- print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
+ print("Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions))
def resolve_jira_issues(title, merge_branches, comment):
@@ -300,23 +301,29 @@ def resolve_jira_issues(title, merge_branches, comment):
def standardize_jira_ref(text):
"""
Standardize the [SPARK-XXXXX] [MODULE] prefix
- Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX][MLLIB] Issue"
+ Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to
+ "[SPARK-XXX][MLLIB] Issue"
- >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
+ >>> standardize_jira_ref(
+ ... "[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
'[SPARK-5821][SQL] ParquetRelation2 CTAS should check if delete is successful'
- >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
+ >>> standardize_jira_ref(
+ ... "[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
'[SPARK-4123][PROJECT INFRA][WIP] Show new dependencies added in pull requests'
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
'[SPARK-5954][MLLIB] Top by key'
>>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
'[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
- >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
+ >>> standardize_jira_ref(
+ ... "SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
'[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
>>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
'[SPARK-1146][WIP] Vagrant support for Spark'
- >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
+ >>> standardize_jira_ref(
+ ... "SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
'[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
- >>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
+ >>> standardize_jira_ref(
+ ... "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
'[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
>>> standardize_jira_ref("Additional information for users building from source code")
'Additional information for users building from source code'
@@ -350,7 +357,8 @@ def standardize_jira_ref(text):
# Assemble full text (JIRA ref(s), module(s), remaining text)
clean_text = ''.join(jira_refs).strip() + ''.join(components).strip() + " " + text.strip()
- # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
+ # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were
+ # included
clean_text = re.sub(r'\s+', ' ', clean_text.strip())
return clean_text
@@ -385,17 +393,17 @@ def main():
# Decide whether to use the modified title or not
modified_title = standardize_jira_ref(pr["title"])
if modified_title != pr["title"]:
- print "I've re-written the title as follows to match the standard format:"
- print "Original: %s" % pr["title"]
- print "Modified: %s" % modified_title
+ print("I've re-written the title as follows to match the standard format:")
+ print("Original: %s" % pr["title"])
+ print("Modified: %s" % modified_title)
result = raw_input("Would you like to use the modified title? (y/n): ")
if result.lower() == "y":
title = modified_title
- print "Using modified title:"
+ print("Using modified title:")
else:
title = pr["title"]
- print "Using original title:"
- print title
+ print("Using original title:")
+ print(title)
else:
title = pr["title"]
@@ -414,13 +422,13 @@ def main():
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
- print "Pull request %s has already been merged, assuming you want to backport" % pr_num
+ print("Pull request %s has already been merged, assuming you want to backport" % pr_num)
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
- "%s^{commit}" % merge_hash]).strip() != ""
+ "%s^{commit}" % merge_hash]).strip() != ""
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
- print "Found commit %s:\n%s" % (merge_hash, message)
+ print("Found commit %s:\n%s" % (merge_hash, message))
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)
@@ -429,9 +437,9 @@ def main():
"Continue? (experts only!)"
continue_maybe(msg)
- print ("\n=== Pull Request #%s ===" % pr_num)
- print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
- title, pr_repo_desc, target_ref, url))
+ print("\n=== Pull Request #%s ===" % pr_num)
+ print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" %
+ (title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
merged_refs = [target_ref]
@@ -445,14 +453,15 @@ def main():
if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
- jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
+ jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % \
+ (pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
else:
- print "JIRA_USERNAME and JIRA_PASSWORD not set"
- print "Exiting without trying to close the associated JIRA."
+ print("JIRA_USERNAME and JIRA_PASSWORD not set")
+ print("Exiting without trying to close the associated JIRA.")
else:
- print "Could not find jira-python library. Run 'sudo pip install jira' to install."
- print "Exiting without trying to close the associated JIRA."
+ print("Could not find jira-python library. Run 'sudo pip install jira' to install.")
+ print("Exiting without trying to close the associated JIRA.")
if __name__ == "__main__":
import doctest