aboutsummaryrefslogtreecommitdiff
path: root/dev
diff options
context:
space:
mode:
authorAndrew Or <andrew@databricks.com>2014-12-16 22:11:03 -0800
committerAndrew Or <andrew@databricks.com>2014-12-16 22:15:09 -0800
commitbeb75aca6656cf2557aae25eac8210947a716d39 (patch)
tree07c09aecc93e84681c0cd635560cb0b6542f240c /dev
parent8a69ed33f9f21d4cedc24fd2c3f7c2e79628cdf9 (diff)
downloadspark-beb75aca6656cf2557aae25eac8210947a716d39.tar.gz
spark-beb75aca6656cf2557aae25eac8210947a716d39.tar.bz2
spark-beb75aca6656cf2557aae25eac8210947a716d39.zip
[Release] Update contributors list format and sort it
Additionally, we now warn the user when a duplicate author name arises, in which case he/she needs to resolve it manually.
Diffstat (limited to 'dev')
-rwxr-xr-xdev/create-release/generate-contributors.py8
-rwxr-xr-xdev/create-release/translate-contributors.py34
2 files changed, 28 insertions, 14 deletions
diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py
index e65c5d8233..8aaa250bd7 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -192,9 +192,9 @@ for commit in filtered_commits:
print "==================================================================================\n"
# Write to contributors file ordered by author names
-# Each line takes the format "Author name - semi-colon delimited contributions"
-# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core
-# e.g. Tathagata Das - Bug fixes and new features in Streaming
+# Each line takes the format " * Author name -- semi-colon delimited contributions"
+# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core
+# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
contributors_file = open(contributors_file_name, "w")
authors = author_info.keys()
authors.sort()
@@ -223,7 +223,7 @@ for author in authors:
# E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
if author in invalid_authors and invalid_authors[author]:
author = author + "/" + "/".join(invalid_authors[author])
- line = "%s - %s" % (author, contribution)
+ line = " * %s -- %s" % (author, contribution)
contributors_file.write(line + "\n")
contributors_file.close()
print "Contributors list is successfully written to %s!" % contributors_file_name
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
index f3b1efdd42..86fa02d87b 100755
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@@ -43,14 +43,12 @@ if not JIRA_USERNAME or not JIRA_PASSWORD:
if not GITHUB_API_TOKEN:
sys.exit("GITHUB_API_TOKEN must be set")
-# Write new contributors list to <old_file_name>.new
+# Write new contributors list to <old_file_name>.final
if not os.path.isfile(contributors_file_name):
print "Contributors file %s does not exist!" % contributors_file_name
print "Have you run ./generate-contributors.py yet?"
sys.exit(1)
contributors_file = open(contributors_file_name, "r")
-new_contributors_file_name = contributors_file_name + ".new"
-new_contributors_file = open(new_contributors_file_name, "w")
warnings = []
# In non-interactive mode, this script will choose the first replacement that is valid
@@ -73,7 +71,7 @@ known_translations_file_name = "known_translations"
known_translations_file = open(known_translations_file_name, "r")
for line in known_translations_file:
if line.startswith("#"): continue
- [old_name, new_name] = line.split(" - ")
+ [old_name, new_name] = line.strip("\n").split(" - ")
known_translations[old_name] = new_name
known_translations_file.close()
@@ -147,16 +145,16 @@ def generate_candidates(author, issues):
# If no such name exists, the original name is used (without the JIRA numbers).
print "\n========================== Translating contributor list =========================="
lines = contributors_file.readlines()
+contributions = []
for i, line in enumerate(lines):
- temp_author = line.split(" - ")[0]
+ temp_author = line.strip(" * ").split(" -- ")[0]
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
if not temp_author:
- error_msg = " ERROR: Expected the following format <author> - <contributions>\n"
+ error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n"
error_msg += " ERROR: Actual = %s" % line
print error_msg
warnings.append(error_msg)
- new_contributors_file.write(line)
- new_contributors_file.flush()
+ contributions.append(line)
continue
author = temp_author.split("/")[0]
# Use the local copy of known translations where possible
@@ -222,10 +220,26 @@ for i, line in enumerate(lines):
known_translations_file.write("%s - %s\n" % (author, new_author))
known_translations_file.flush()
line = line.replace(temp_author, author)
- new_contributors_file.write(line)
- new_contributors_file.flush()
+ contributions.append(line)
print "==================================================================================\n"
contributors_file.close()
+known_translations_file.close()
+
+# Sort the contributions before writing them to the new file.
+# Additionally, check if there are any duplicate author rows.
+# This could happen if the same user has both a valid full
+# name (e.g. Andrew Or) and an invalid one (andrewor14).
+# If so, warn the user about this at the end.
+contributions.sort()
+all_authors = set()
+new_contributors_file_name = contributors_file_name + ".final"
+new_contributors_file = open(new_contributors_file_name, "w")
+for line in contributions:
+ author = line.strip(" * ").split(" -- ")[0]
+ if author in all_authors:
+ warnings.append("Detected duplicate author name %s. Please merge these manually." % author)
+ all_authors.add(author)
+ new_contributors_file.write(line)
new_contributors_file.close()
print "Translated contributors list successfully written to %s!" % new_contributors_file_name