aboutsummaryrefslogtreecommitdiff
path: root/dev/create-release
diff options
context:
space:
mode:
authorTathagata Das <tathagata.das1565@gmail.com>2014-05-22 20:48:55 -0700
committerTathagata Das <tathagata.das1565@gmail.com>2014-05-22 20:48:55 -0700
commitb2bdd0e505f1ae3d39c46139f17bd43779ece635 (patch)
treefd1f6274986c3bf259c0dd3a0adaf6c2cfddc1a1 /dev/create-release
parentcce77457e00aa5f1f4db3d50454cf257efb156ed (diff)
downloadspark-b2bdd0e505f1ae3d39c46139f17bd43779ece635.tar.gz
spark-b2bdd0e505f1ae3d39c46139f17bd43779ece635.tar.bz2
spark-b2bdd0e505f1ae3d39c46139f17bd43779ece635.zip
Updated scripts for auditing releases
- Added script to automatically generate change list CHANGES.txt - Added test for verifying linking against maven distributions of `spark-sql` and `spark-hive` - Added SBT projects for testing functionality of `spark-sql` and `spark-hive` - Fixed issues in existing tests that might have come up because of changes in Spark 1.0 Author: Tathagata Das <tathagata.das1565@gmail.com> Closes #844 from tdas/update-dev-scripts and squashes the following commits: 25090ba [Tathagata Das] Added missing license e2e20b3 [Tathagata Das] Updated tests for auditing releases.
Diffstat (limited to 'dev/create-release')
-rwxr-xr-xdev/create-release/generate-changelist.py144
1 files changed, 144 insertions, 0 deletions
diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py
new file mode 100755
index 0000000000..13b744ec1b
--- /dev/null
+++ b/dev/create-release/generate-changelist.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Creates CHANGES.txt from git history.
+#
+# Usage:
+# First set the new release version and old CHANGES.txt version in this file.
+# Make sure you have SPARK_HOME set.
+# $ python generate-changelist.py
+
+
+import os
+import sys
+import subprocess
+import time
+import traceback
+
+SPARK_HOME = os.environ["SPARK_HOME"]
+NEW_RELEASE_VERSION = "1.0.0"
+PREV_RELEASE_GIT_TAG = "v0.9.1"
+
+CHANGELIST = "CHANGES.txt"
+OLD_CHANGELIST = "%s.old" % (CHANGELIST)
+NEW_CHANGELIST = "%s.new" % (CHANGELIST)
+TMP_CHANGELIST = "%s.tmp" % (CHANGELIST)
+
+# date before first PR in TLP Spark repo
+SPARK_REPO_CHANGE_DATE1 = time.strptime("2014-02-26", "%Y-%m-%d")
+# date after last PR in incubator Spark repo
+SPARK_REPO_CHANGE_DATE2 = time.strptime("2014-03-01", "%Y-%m-%d")
+# Threshold PR number that differentiates PRs to TLP
+# and incubator repos
+SPARK_REPO_PR_NUM_THRESH = 200
+
+LOG_FILE_NAME = "changes_%s" % time.strftime("%h_%m_%Y_%I_%M_%S")
+LOG_FILE = open(LOG_FILE_NAME, 'w')
+
+def run_cmd(cmd):
+ try:
+ print >> LOG_FILE, "Running command: %s" % cmd
+ output = subprocess.check_output(cmd, shell=True, stderr=LOG_FILE)
+ print >> LOG_FILE, "Output: %s" % output
+ return output
+ except:
+ traceback.print_exc()
+ cleanup()
+ sys.exit(1)
+
+def append_to_changelist(string):
+ with open(TMP_CHANGELIST, "a") as f:
+ print >> f, string
+
+def cleanup(ask = True):
+ if ask == True:
+ print "OK to delete temporary and log files? (y/N): "
+ response = raw_input()
+ if ask == False or (ask == True and response == "y"):
+ if os.path.isfile(TMP_CHANGELIST):
+ os.remove(TMP_CHANGELIST)
+ if os.path.isfile(OLD_CHANGELIST):
+ os.remove(OLD_CHANGELIST)
+ LOG_FILE.close()
+ os.remove(LOG_FILE_NAME)
+
+print "Generating new %s for Spark release %s" % (CHANGELIST, NEW_RELEASE_VERSION)
+os.chdir(SPARK_HOME)
+if os.path.isfile(TMP_CHANGELIST):
+ os.remove(TMP_CHANGELIST)
+if os.path.isfile(OLD_CHANGELIST):
+ os.remove(OLD_CHANGELIST)
+
+append_to_changelist("Spark Change Log")
+append_to_changelist("----------------")
+append_to_changelist("")
+append_to_changelist("Release %s" % NEW_RELEASE_VERSION)
+append_to_changelist("")
+
+print "Getting commits between tag %s and HEAD" % PREV_RELEASE_GIT_TAG
+hashes = run_cmd("git log %s..HEAD --pretty='%%h'" % PREV_RELEASE_GIT_TAG).split()
+
+print "Getting details of %s commits" % len(hashes)
+for h in hashes:
+ date = run_cmd("git log %s -1 --pretty='%%ad' --date=iso | head -1" % h).strip()
+ subject = run_cmd("git log %s -1 --pretty='%%s' | head -1" % h).strip()
+ body = run_cmd("git log %s -1 --pretty='%%b'" % h)
+ committer = run_cmd("git log %s -1 --pretty='%%cn <%%ce>' | head -1" % h).strip()
+ body_lines = body.split("\n")
+
+ if "Merge pull" in subject:
+ ## Parse old format commit message
+ append_to_changelist(" %s %s" % (h, date))
+ append_to_changelist(" %s" % subject)
+ append_to_changelist(" [%s]" % body_lines[0])
+ append_to_changelist("")
+
+ elif "maven-release" not in subject:
+ ## Parse new format commit message
+ # Get authors from commit message, committer otherwise
+ authors = [committer]
+ if "Author:" in body:
+ authors = [line.split(":")[1].strip() for line in body_lines if "Author:" in line]
+
+ # Generate GitHub PR URL for easy access if possible
+ github_url = ""
+ if "Closes #" in body:
+ pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes #" in line][0]
+ github_url = "github.com/apache/spark/pull/%s" % pr_num
+ day = time.strptime(date.split()[0], "%Y-%m-%d")
+ if day < SPARK_REPO_CHANGE_DATE1 or (day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH):
+ github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num
+
+ append_to_changelist(" %s" % subject)
+ append_to_changelist(" %s" % ', '.join(authors))
+ # for author in authors:
+ # append_to_changelist(" %s" % author)
+ append_to_changelist(" %s" % date)
+ if len(github_url) > 0:
+ append_to_changelist(" Commit: %s, %s" % (h, github_url))
+ else:
+ append_to_changelist(" Commit: %s" % h)
+ append_to_changelist("")
+
+# Append old change list
+print "Appending changelist from tag %s" % PREV_RELEASE_GIT_TAG
+run_cmd("git show %s:%s | tail -n +3 >> %s" % (PREV_RELEASE_GIT_TAG, CHANGELIST, TMP_CHANGELIST))
+run_cmd("cp %s %s" % (TMP_CHANGELIST, NEW_CHANGELIST))
+print "New change list generated as %s" % NEW_CHANGELIST
+cleanup(False)
+