diff options
author | Andrew Or <andrewor14@gmail.com> | 2014-11-13 00:30:58 +0000 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2014-11-12 16:35:10 -0800 |
commit | 675df2afd496a4bd1a48b77c0aaef2e461d5b145 (patch) | |
tree | 15bc31edd946feeb2b23f42425b628ee725af121 /dev/audit-release/audit_release.py | |
parent | 44f67ac7d0b9bd92c6516320fdfada8f3a7856bd (diff) | |
download | spark-675df2afd496a4bd1a48b77c0aaef2e461d5b145.tar.gz spark-675df2afd496a4bd1a48b77c0aaef2e461d5b145.tar.bz2 spark-675df2afd496a4bd1a48b77c0aaef2e461d5b145.zip |
[Release] Bring audit scripts up-to-date
This involves a few main changes:
- Log all output message to the log file. Previously the log file
was not useful because it did not indicate progress.
- Remove hive-site.xml in sbt_hive_app to avoid interference
- Add the appropriate repositories for new dependencies
Diffstat (limited to 'dev/audit-release/audit_release.py')
-rwxr-xr-x | dev/audit-release/audit_release.py | 143 |
1 files changed, 71 insertions, 72 deletions
diff --git a/dev/audit-release/audit_release.py b/dev/audit-release/audit_release.py index 16ea1a7129..0b7069f6e1 100755 --- a/dev/audit-release/audit_release.py +++ b/dev/audit-release/audit_release.py @@ -30,71 +30,84 @@ import sys import time import urllib2 -# Fill in release details here: -RELEASE_URL = "http://people.apache.org/~pwendell/spark-1.0.0-rc1/" -RELEASE_KEY = "9E4FE3AF" -RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1006/" -RELEASE_VERSION = "1.0.0" +# Note: The following variables must be set before use! +RELEASE_URL = "http://people.apache.org/~andrewor14/spark-1.1.1-rc1/" +RELEASE_KEY = "XXXXXXXX" # Your 8-digit hex +RELEASE_REPOSITORY = "https://repository.apache.org/content/repositories/orgapachespark-1033" +RELEASE_VERSION = "1.1.1" SCALA_VERSION = "2.10.4" SCALA_BINARY_VERSION = "2.10" -# +# Do not set these LOG_FILE_NAME = "spark_audit_%s" % time.strftime("%h_%m_%Y_%I_%M_%S") LOG_FILE = open(LOG_FILE_NAME, 'w') WORK_DIR = "/tmp/audit_%s" % int(time.time()) MAVEN_CMD = "mvn" GPG_CMD = "gpg" +SBT_CMD = "sbt -Dsbt.log.noformat=true" -print "Starting tests, log output in %s. Test results printed below:" % LOG_FILE_NAME - -# Track failures +# Track failures to print them at the end failures = [] +# Log a message. Use sparingly because this flushes every write. +def log(msg): + LOG_FILE.write(msg + "\n") + LOG_FILE.flush() +def log_and_print(msg): + print msg + log(msg) + +# Prompt the user to delete the scratch directory used def clean_work_files(): - print "OK to delete scratch directory '%s'? (y/N): " % WORK_DIR - response = raw_input() + response = raw_input("OK to delete scratch directory '%s'? (y/N) " % WORK_DIR) if response == "y": shutil.rmtree(WORK_DIR) - print "Should I delete the log output file '%s'? (y/N): " % LOG_FILE_NAME - response = raw_input() - if response == "y": - os.unlink(LOG_FILE_NAME) - +# Run the given command and log its output to the log file def run_cmd(cmd, exit_on_failure=True): - print >> LOG_FILE, "Running command: %s" % cmd + log("Running command: %s" % cmd) ret = subprocess.call(cmd, shell=True, stdout=LOG_FILE, stderr=LOG_FILE) if ret != 0 and exit_on_failure: - print "Command failed: %s" % cmd + log_and_print("Command failed: %s" % cmd) clean_work_files() sys.exit(-1) return ret - def run_cmd_with_output(cmd): - print >> sys.stderr, "Running command: %s" % cmd + log_and_print("Running command: %s" % cmd) return subprocess.check_output(cmd, shell=True, stderr=LOG_FILE) +# Test if the given condition is successful +# If so, print the pass message; otherwise print the failure message +def test(cond, msg): + return passed(msg) if cond else failed(msg) -def test(bool, str): - if bool: - return passed(str) - failed(str) - - -def passed(str): - print "[PASSED] %s" % str - - -def failed(str): - failures.append(str) - print "[**FAILED**] %s" % str +def passed(msg): + log_and_print("[PASSED] %s" % msg) +def failed(msg): + failures.append(msg) + log_and_print("[**FAILED**] %s" % msg) def get_url(url): return urllib2.urlopen(url).read() +# If the path exists, prompt the user to delete it +# If the resource is not deleted, abort +def ensure_path_not_present(path): + full_path = os.path.expanduser(path) + if os.path.exists(full_path): + print "Found %s locally." % full_path + response = raw_input("This can interfere with testing published artifacts. OK to delete? (y/N) ") + if response == "y": + shutil.rmtree(full_path) + else: + print "Abort." + sys.exit(-1) + +log_and_print("|-------- Starting Spark audit tests for release %s --------|" % RELEASE_VERSION) +log_and_print("Log output can be found in %s" % LOG_FILE_NAME) original_dir = os.getcwd() @@ -114,37 +127,36 @@ local_ivy_spark = "~/.ivy2/local/org.apache.spark" cache_ivy_spark = "~/.ivy2/cache/org.apache.spark" local_maven_kafka = "~/.m2/repository/org/apache/kafka" local_maven_kafka = "~/.m2/repository/org/apache/spark" - - -def ensure_path_not_present(x): - if os.path.exists(os.path.expanduser(x)): - print "Please remove %s, it can interfere with testing published artifacts." % x - sys.exit(-1) - map(ensure_path_not_present, [local_ivy_spark, cache_ivy_spark, local_maven_kafka]) # SBT build tests +log_and_print("==== Building SBT modules ====") os.chdir("blank_sbt_build") os.environ["SPARK_VERSION"] = RELEASE_VERSION os.environ["SCALA_VERSION"] = SCALA_VERSION os.environ["SPARK_RELEASE_REPOSITORY"] = RELEASE_REPOSITORY os.environ["SPARK_AUDIT_MASTER"] = "local" for module in modules: + log("==== Building module %s in SBT ====" % module) os.environ["SPARK_MODULE"] = module - ret = run_cmd("sbt clean update", exit_on_failure=False) - test(ret == 0, "sbt build against '%s' module" % module) + ret = run_cmd("%s clean update" % SBT_CMD, exit_on_failure=False) + test(ret == 0, "SBT build against '%s' module" % module) os.chdir(original_dir) # SBT application tests +log_and_print("==== Building SBT applications ====") for app in ["sbt_app_core", "sbt_app_graphx", "sbt_app_streaming", "sbt_app_sql", "sbt_app_hive", "sbt_app_kinesis"]: + log("==== Building application %s in SBT ====" % app) os.chdir(app) - ret = run_cmd("sbt clean run", exit_on_failure=False) - test(ret == 0, "sbt application (%s)" % app) + ret = run_cmd("%s clean run" % SBT_CMD, exit_on_failure=False) + test(ret == 0, "SBT application (%s)" % app) os.chdir(original_dir) # Maven build tests os.chdir("blank_maven_build") +log_and_print("==== Building Maven modules ====") for module in modules: + log("==== Building module %s in maven ====" % module) cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" ' '-Dspark.module="%s" clean compile' % (MAVEN_CMD, RELEASE_REPOSITORY, RELEASE_VERSION, module)) @@ -152,6 +164,8 @@ for module in modules: test(ret == 0, "maven build against '%s' module" % module) os.chdir(original_dir) +# Maven application tests +log_and_print("==== Building Maven applications ====") os.chdir("maven_app_core") mvn_exec_cmd = ('%s --update-snapshots -Dspark.release.repository="%s" -Dspark.version="%s" ' '-Dscala.binary.version="%s" clean compile ' @@ -172,15 +186,14 @@ index_page = get_url(RELEASE_URL) artifact_regex = r = re.compile("<a href=\"(.*.tgz)\">") artifacts = r.findall(index_page) +# Verify artifact integrity for artifact in artifacts: - print "==== Verifying download integrity for artifact: %s ====" % artifact + log_and_print("==== Verifying download integrity for artifact: %s ====" % artifact) artifact_url = "%s/%s" % (RELEASE_URL, artifact) - run_cmd("wget %s" % artifact_url) - key_file = "%s.asc" % artifact + run_cmd("wget %s" % artifact_url) run_cmd("wget %s/%s" % (RELEASE_URL, key_file)) - run_cmd("wget %s%s" % (artifact_url, ".sha")) # Verify signature @@ -208,31 +221,17 @@ for artifact in artifacts: os.chdir(WORK_DIR) -for artifact in artifacts: - print "==== Verifying build and tests for artifact: %s ====" % artifact - os.chdir(os.path.join(WORK_DIR, dir_name)) - - os.environ["MAVEN_OPTS"] = "-Xmx3g -XX:MaxPermSize=1g -XX:ReservedCodeCacheSize=1g" - # Verify build - print "==> Running build" - run_cmd("sbt assembly") - passed("sbt build successful") - run_cmd("%s package -DskipTests" % MAVEN_CMD) - passed("Maven build successful") - - # Verify tests - print "==> Performing unit tests" - run_cmd("%s test" % MAVEN_CMD) - passed("Tests successful") - os.chdir(WORK_DIR) - -clean_work_files() - +# Report result +log_and_print("\n") if len(failures) == 0: - print "ALL TESTS PASSED" + log_and_print("*** ALL TESTS PASSED ***") else: - print "SOME TESTS DID NOT PASS" + log_and_print("XXXXX SOME TESTS DID NOT PASS XXXXX") for f in failures: - print f - + log_and_print(" %s" % f) os.chdir(original_dir) + +# Clean up +clean_work_files() + +log_and_print("|-------- Spark release audit complete --------|") |