diff options
author | Andrew Or <andrew@databricks.com> | 2014-12-16 22:11:03 -0800 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2014-12-16 22:14:18 -0800 |
commit | 4e1112e7b0f52e7f59cc42025f300fa8124eb9b2 (patch) | |
tree | b8538ccf29f539878018573a19182de1f933b13f | |
parent | 60698801ebc4c4947cfc7f46762a7cca2ed40452 (diff) | |
download | spark-4e1112e7b0f52e7f59cc42025f300fa8124eb9b2.tar.gz spark-4e1112e7b0f52e7f59cc42025f300fa8124eb9b2.tar.bz2 spark-4e1112e7b0f52e7f59cc42025f300fa8124eb9b2.zip |
[Release] Update contributors list format and sort it
Additionally, we now warn the user when a duplicate author name
arises, in which case he/she needs to resolve it manually.
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | .rat-excludes | 1 | ||||
-rwxr-xr-x | dev/create-release/generate-contributors.py | 8 | ||||
-rwxr-xr-x | dev/create-release/translate-contributors.py | 34 |
4 files changed, 30 insertions, 15 deletions
diff --git a/.gitignore b/.gitignore index 3b9086c718..30b1e12bf1 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,7 @@ checkpoint derby.log dist/ dev/create-release/*txt -dev/create-release/*new +dev/create-release/*final spark-*-bin-*.tgz unit-tests.log /lib/ diff --git a/.rat-excludes b/.rat-excludes index d8bee1f8e4..1bf97f0f8b 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -64,3 +64,4 @@ dist/* logs .*scalastyle-output.xml .*dependency-reduced-pom.xml +dev/create-release/known_translations diff --git a/dev/create-release/generate-contributors.py b/dev/create-release/generate-contributors.py index e65c5d8233..8aaa250bd7 100755 --- a/dev/create-release/generate-contributors.py +++ b/dev/create-release/generate-contributors.py @@ -192,9 +192,9 @@ for commit in filtered_commits: print "==================================================================================\n" # Write to contributors file ordered by author names -# Each line takes the format "Author name - semi-colon delimited contributions" -# e.g. Andrew Or - Bug fixes in Windows, Core, and Web UI; improvements in Core -# e.g. Tathagata Das - Bug fixes and new features in Streaming +# Each line takes the format " * Author name -- semi-colon delimited contributions" +# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in Core +# e.g. * Tathagata Das -- Bug fixes and new features in Streaming contributors_file = open(contributors_file_name, "w") authors = author_info.keys() authors.sort() @@ -223,7 +223,7 @@ for author in authors: # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672 if author in invalid_authors and invalid_authors[author]: author = author + "/" + "/".join(invalid_authors[author]) - line = "%s - %s" % (author, contribution) + line = " * %s -- %s" % (author, contribution) contributors_file.write(line + "\n") contributors_file.close() print "Contributors list is successfully written to %s!" % contributors_file_name diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py index f3b1efdd42..86fa02d87b 100755 --- a/dev/create-release/translate-contributors.py +++ b/dev/create-release/translate-contributors.py @@ -43,14 +43,12 @@ if not JIRA_USERNAME or not JIRA_PASSWORD: if not GITHUB_API_TOKEN: sys.exit("GITHUB_API_TOKEN must be set") -# Write new contributors list to <old_file_name>.new +# Write new contributors list to <old_file_name>.final if not os.path.isfile(contributors_file_name): print "Contributors file %s does not exist!" % contributors_file_name print "Have you run ./generate-contributors.py yet?" sys.exit(1) contributors_file = open(contributors_file_name, "r") -new_contributors_file_name = contributors_file_name + ".new" -new_contributors_file = open(new_contributors_file_name, "w") warnings = [] # In non-interactive mode, this script will choose the first replacement that is valid @@ -73,7 +71,7 @@ known_translations_file_name = "known_translations" known_translations_file = open(known_translations_file_name, "r") for line in known_translations_file: if line.startswith("#"): continue - [old_name, new_name] = line.split(" - ") + [old_name, new_name] = line.strip("\n").split(" - ") known_translations[old_name] = new_name known_translations_file.close() @@ -147,16 +145,16 @@ def generate_candidates(author, issues): # If no such name exists, the original name is used (without the JIRA numbers). print "\n========================== Translating contributor list ==========================" lines = contributors_file.readlines() +contributions = [] for i, line in enumerate(lines): - temp_author = line.split(" - ")[0] + temp_author = line.strip(" * ").split(" -- ")[0] print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines)) if not temp_author: - error_msg = " ERROR: Expected the following format <author> - <contributions>\n" + error_msg = " ERROR: Expected the following format \" * <author> -- <contributions>\"\n" error_msg += " ERROR: Actual = %s" % line print error_msg warnings.append(error_msg) - new_contributors_file.write(line) - new_contributors_file.flush() + contributions.append(line) continue author = temp_author.split("/")[0] # Use the local copy of known translations where possible @@ -222,10 +220,26 @@ for i, line in enumerate(lines): known_translations_file.write("%s - %s\n" % (author, new_author)) known_translations_file.flush() line = line.replace(temp_author, author) - new_contributors_file.write(line) - new_contributors_file.flush() + contributions.append(line) print "==================================================================================\n" contributors_file.close() +known_translations_file.close() + +# Sort the contributions before writing them to the new file. +# Additionally, check if there are any duplicate author rows. +# This could happen if the same user has both a valid full +# name (e.g. Andrew Or) and an invalid one (andrewor14). +# If so, warn the user about this at the end. +contributions.sort() +all_authors = set() +new_contributors_file_name = contributors_file_name + ".final" +new_contributors_file = open(new_contributors_file_name, "w") +for line in contributions: + author = line.strip(" * ").split(" -- ")[0] + if author in all_authors: + warnings.append("Detected duplicate author name %s. Please merge these manually." % author) + all_authors.add(author) + new_contributors_file.write(line) new_contributors_file.close() print "Translated contributors list successfully written to %s!" % new_contributors_file_name |