aboutsummaryrefslogtreecommitdiff
path: root/dev
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-12-30 12:47:42 -0800
committerJosh Rosen <joshrosen@databricks.com>2015-12-30 12:47:42 -0800
commit27a42c7108ced48a7f558990de2e4fc7ed340119 (patch)
treee65525f7dee5ceae053643ac3f5e8b4a1716272b /dev
parentd1ca634db4ca9db7f0ba7ca38a0e03bcbfec23c9 (diff)
downloadspark-27a42c7108ced48a7f558990de2e4fc7ed340119.tar.gz
spark-27a42c7108ced48a7f558990de2e4fc7ed340119.tar.bz2
spark-27a42c7108ced48a7f558990de2e4fc7ed340119.zip
[SPARK-10359] Enumerate dependencies in a file and diff against it for new pull requests
This patch adds a new build check which enumerates Spark's resolved runtime classpath and saves it to a file, then diffs against that file to detect whether pull requests have introduced dependency changes. The aim of this check is to make it simpler to reason about whether pull request which modify the build have introduced new dependencies or changed transitive dependencies in a way that affects the final classpath. This supplants the checks added in SPARK-4123 / #5093, which are currently disabled due to bugs. This patch is based on pwendell's work in #8531. Closes #8531. Author: Josh Rosen <joshrosen@databricks.com> Author: Patrick Wendell <patrick@databricks.com> Closes #10461 from JoshRosen/SPARK-10359.
Diffstat (limited to 'dev')
-rw-r--r--dev/deps/spark-deps-hadoop-2.3184
-rw-r--r--dev/deps/spark-deps-hadoop-2.4185
-rwxr-xr-xdev/run-tests-jenkins.py2
-rwxr-xr-xdev/run-tests.py8
-rw-r--r--dev/sparktestsupport/__init__.py1
-rw-r--r--dev/sparktestsupport/modules.py15
-rwxr-xr-xdev/test-dependencies.sh102
-rwxr-xr-xdev/tests/pr_new_dependencies.sh117
8 files changed, 494 insertions, 120 deletions
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
new file mode 100644
index 0000000000..6014d50c6b
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -0,0 +1,184 @@
+JavaEWAH-0.3.2.jar
+RoaringBitmap-0.5.11.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.2.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.3.0.jar
+hadoop-auth-2.3.0.jar
+hadoop-client-2.3.0.jar
+hadoop-common-2.3.0.jar
+hadoop-hdfs-2.3.0.jar
+hadoop-mapreduce-client-app-2.3.0.jar
+hadoop-mapreduce-client-common-2.3.0.jar
+hadoop-mapreduce-client-core-2.3.0.jar
+hadoop-mapreduce-client-jobclient-2.3.0.jar
+hadoop-mapreduce-client-shuffle-2.3.0.jar
+hadoop-yarn-api-2.3.0.jar
+hadoop-yarn-client-2.3.0.jar
+hadoop-yarn-common-2.3.0.jar
+hadoop-yarn-server-common-2.3.0.jar
+hadoop-yarn-server-web-proxy-2.3.0.jar
+httpclient-4.3.2.jar
+httpcore-4.3.2.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.9.13.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl-1.9.13.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.9.13.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+java-xmlbuilder-1.0.jar
+javax.inject-1.jar
+javax.servlet-3.0.0.v201112011016.jar
+javolution-5.5.1.jar
+jaxb-api-2.2.2.jar
+jaxb-impl-2.2.3-1.jar
+jcl-over-slf4j-1.7.10.jar
+jdo-api-3.0.1.jar
+jersey-core-1.9.jar
+jersey-guice-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.9.3.jar
+jettison-1.1.jar
+jetty-6.1.26.jar
+jetty-all-7.6.0.v20120127.jar
+jetty-util-6.1.26.jar
+jline-2.10.5.jar
+jline-2.12.jar
+joda-time-2.9.jar
+jodd-core-3.5.2.jar
+jpam-1.1.jar
+json-20090211.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jta-1.1.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+libfb303-0.9.2.jar
+libthrift-0.9.2.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mail-1.4.7.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-hadoop-bundle-1.6.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.2.7.jar
+pmml-model-1.2.7.jar
+pmml-schema-1.2.7.jar
+protobuf-java-2.5.0.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.5.jar
+scala-library-2.10.5.jar
+scala-reflect-2.10.5.jar
+scalap-2.10.5.jar
+servlet-api-2.5.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-0.2.jar
+snappy-java-1.1.2.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stax-api-1.0-2.jar
+stax-api-1.0.1.jar
+stream-2.7.0.jar
+stringtemplate-3.2.1.jar
+super-csv-2.2.0.jar
+tachyon-client-0.8.2.jar
+tachyon-underfs-hdfs-0.8.2.jar
+tachyon-underfs-local-0.8.2.jar
+tachyon-underfs-s3-0.8.2.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xbean-asm5-shaded-4.4.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
new file mode 100644
index 0000000000..f56e6f4393
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -0,0 +1,185 @@
+JavaEWAH-0.3.2.jar
+RoaringBitmap-0.5.11.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.2.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.4.0.jar
+hadoop-auth-2.4.0.jar
+hadoop-client-2.4.0.jar
+hadoop-common-2.4.0.jar
+hadoop-hdfs-2.4.0.jar
+hadoop-mapreduce-client-app-2.4.0.jar
+hadoop-mapreduce-client-common-2.4.0.jar
+hadoop-mapreduce-client-core-2.4.0.jar
+hadoop-mapreduce-client-jobclient-2.4.0.jar
+hadoop-mapreduce-client-shuffle-2.4.0.jar
+hadoop-yarn-api-2.4.0.jar
+hadoop-yarn-client-2.4.0.jar
+hadoop-yarn-common-2.4.0.jar
+hadoop-yarn-server-common-2.4.0.jar
+hadoop-yarn-server-web-proxy-2.4.0.jar
+httpclient-4.3.2.jar
+httpcore-4.3.2.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.9.13.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl-1.9.13.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.9.13.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+java-xmlbuilder-1.0.jar
+javax.inject-1.jar
+javax.servlet-3.0.0.v201112011016.jar
+javolution-5.5.1.jar
+jaxb-api-2.2.2.jar
+jaxb-impl-2.2.3-1.jar
+jcl-over-slf4j-1.7.10.jar
+jdo-api-3.0.1.jar
+jersey-client-1.9.jar
+jersey-core-1.9.jar
+jersey-guice-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.9.3.jar
+jettison-1.1.jar
+jetty-6.1.26.jar
+jetty-all-7.6.0.v20120127.jar
+jetty-util-6.1.26.jar
+jline-2.10.5.jar
+jline-2.12.jar
+joda-time-2.9.jar
+jodd-core-3.5.2.jar
+jpam-1.1.jar
+json-20090211.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jta-1.1.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+libfb303-0.9.2.jar
+libthrift-0.9.2.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mail-1.4.7.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-hadoop-bundle-1.6.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.2.7.jar
+pmml-model-1.2.7.jar
+pmml-schema-1.2.7.jar
+protobuf-java-2.5.0.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.5.jar
+scala-library-2.10.5.jar
+scala-reflect-2.10.5.jar
+scalap-2.10.5.jar
+servlet-api-2.5.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-0.2.jar
+snappy-java-1.1.2.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stax-api-1.0-2.jar
+stax-api-1.0.1.jar
+stream-2.7.0.jar
+stringtemplate-3.2.1.jar
+super-csv-2.2.0.jar
+tachyon-client-0.8.2.jar
+tachyon-underfs-hdfs-0.8.2.jar
+tachyon-underfs-local-0.8.2.jar
+tachyon-underfs-s3-0.8.2.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xbean-asm5-shaded-4.4.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 42afca0e52..6501721572 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -124,6 +124,7 @@ def run_tests(tests_timeout):
ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests',
ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation',
ERROR_CODES["BLOCK_BUILD"]: 'to build',
+ ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests',
ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
@@ -193,7 +194,6 @@ def main():
pr_tests = [
"pr_merge_ability",
"pr_public_classes"
- # DISABLED (pwendell) "pr_new_dependencies"
]
# `bind_message_base` returns a function to generate messages for Github posting
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 6129f87cf8..706e2d141c 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -417,6 +417,11 @@ def run_python_tests(test_modules, parallelism):
run_cmd(command)
+def run_build_tests():
+ set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
+ run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
+
+
def run_sparkr_tests():
set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
@@ -537,6 +542,9 @@ def main():
# if "DOCS" in changed_modules and test_env == "amplab_jenkins":
# build_spark_documentation()
+ if any(m.should_run_build_tests for m in test_modules):
+ run_build_tests()
+
# spark build
build_apache_spark(build_tool, hadoop_version)
diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py
index 0e8032d133..89015f8c4f 100644
--- a/dev/sparktestsupport/__init__.py
+++ b/dev/sparktestsupport/__init__.py
@@ -32,5 +32,6 @@ ERROR_CODES = {
"BLOCK_PYSPARK_UNIT_TESTS": 19,
"BLOCK_SPARKR_UNIT_TESTS": 20,
"BLOCK_JAVA_STYLE": 21,
+ "BLOCK_BUILD_TESTS": 22,
"BLOCK_TIMEOUT": 124
}
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index d65547e04d..4667b289f5 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -31,7 +31,7 @@ class Module(object):
def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={},
sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(),
- test_tags=(), should_run_r_tests=False):
+ test_tags=(), should_run_r_tests=False, should_run_build_tests=False):
"""
Define a new module.
@@ -53,6 +53,7 @@ class Module(object):
:param test_tags A set of tags that will be excluded when running unit tests if the module
is not explicitly changed.
:param should_run_r_tests: If true, changes in this module will trigger all R tests.
+ :param should_run_build_tests: If true, changes in this module will trigger build tests.
"""
self.name = name
self.dependencies = dependencies
@@ -64,6 +65,7 @@ class Module(object):
self.blacklisted_python_implementations = blacklisted_python_implementations
self.test_tags = test_tags
self.should_run_r_tests = should_run_r_tests
+ self.should_run_build_tests = should_run_build_tests
self.dependent_modules = set()
for dep in dependencies:
@@ -394,6 +396,14 @@ docs = Module(
]
)
+build = Module(
+ name="build",
+ dependencies=[],
+ source_file_regexes=[
+ ".*pom.xml",
+ "dev/test-dependencies.sh",
+ ]
+)
ec2 = Module(
name="ec2",
@@ -433,5 +443,6 @@ root = Module(
"test",
],
python_test_goals=list(itertools.chain.from_iterable(m.python_test_goals for m in all_modules)),
- should_run_r_tests=True
+ should_run_r_tests=True,
+ should_run_build_tests=True
)
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
new file mode 100755
index 0000000000..984e29d1be
--- /dev/null
+++ b/dev/test-dependencies.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+FWDIR="$(cd "`dirname $0`"/..; pwd)"
+cd "$FWDIR"
+
+# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution.
+
+# NOTE: These should match those in the release publishing script
+HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
+MVN="build/mvn --force"
+HADOOP_PROFILES=(
+ hadoop-2.3
+ hadoop-2.4
+)
+
+# We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
+# the old version. We need to do this because the `dependency:build-classpath` task needs to
+# resolve Spark's internal submodule dependencies.
+
+# See http://stackoverflow.com/a/3545363 for an explanation of this one-liner:
+OLD_VERSION=$(mvn help:evaluate -Dexpression=project.version|grep -Ev '(^\[|Download\w+:)')
+TEMP_VERSION="spark-$(date +%s | tail -c6)"
+
+function reset_version {
+ # Delete the temporary POMs that we wrote to the local Maven repo:
+ find "$HOME/.m2/" | grep "$TEMP_VERSION" | xargs rm -rf
+
+ # Restore the original version number:
+ $MVN -q versions:set -DnewVersion=$OLD_VERSION -DgenerateBackupPoms=false > /dev/null
+}
+trap reset_version EXIT
+
+$MVN -q versions:set -DnewVersion=$TEMP_VERSION -DgenerateBackupPoms=false > /dev/null
+
+# Generate manifests for each Hadoop profile:
+for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
+ echo "Performing Maven install for $HADOOP_PROFILE"
+ $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar install:install -q \
+ -pl '!assembly' \
+ -pl '!examples' \
+ -pl '!external/flume-assembly' \
+ -pl '!external/kafka-assembly' \
+ -pl '!external/twitter' \
+ -pl '!external/flume' \
+ -pl '!external/mqtt' \
+ -pl '!external/mqtt-assembly' \
+ -pl '!external/zeromq' \
+ -pl '!external/kafka' \
+ -pl '!tags' \
+ -DskipTests
+
+ echo "Generating dependency manifest for $HADOOP_PROFILE"
+ mkdir -p dev/pr-deps
+ $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
+ | grep "Building Spark Project Assembly" -A 5 \
+ | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
+ | grep -v spark > dev/pr-deps/spark-deps-$HADOOP_PROFILE
+done
+
+if [[ $@ == **replace-manifest** ]]; then
+ echo "Replacing manifests and creating new files at dev/deps"
+ rm -rf dev/deps
+ mv dev/pr-deps dev/deps
+ exit 0
+fi
+
+for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
+ set +e
+ dep_diff="$(
+ git diff \
+ --no-index \
+ dev/deps/spark-deps-$HADOOP_PROFILE \
+ dev/pr-deps/spark-deps-$HADOOP_PROFILE \
+ )"
+ set -e
+ if [ "$dep_diff" != "" ]; then
+ echo "Spark's published dependencies DO NOT MATCH the manifest file (dev/spark-deps)."
+ echo "To update the manifest file, run './dev/test-dependencies.sh --replace-manifest'."
+ echo "$dep_diff"
+ rm -rf dev/pr-deps
+ exit 1
+ fi
+done
diff --git a/dev/tests/pr_new_dependencies.sh b/dev/tests/pr_new_dependencies.sh
deleted file mode 100755
index fdfb3c62af..0000000000
--- a/dev/tests/pr_new_dependencies.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This script follows the base format for testing pull requests against
-# another branch and returning results to be published. More details can be
-# found at dev/run-tests-jenkins.
-#
-# Arg1: The Github Pull Request Actual Commit
-#+ known as `ghprbActualCommit` in `run-tests-jenkins`
-# Arg2: The SHA1 hash
-#+ known as `sha1` in `run-tests-jenkins`
-# Arg3: Current PR Commit Hash
-#+ the PR hash for the current commit
-#
-
-ghprbActualCommit="$1"
-sha1="$2"
-current_pr_head="$3"
-
-MVN_BIN="build/mvn"
-CURR_CP_FILE="my-classpath.txt"
-MASTER_CP_FILE="master-classpath.txt"
-
-# First switch over to the master branch
-git checkout -f master
-# Find and copy all pom.xml files into a *.gate file that we can check
-# against through various `git` changes
-find -name "pom.xml" -exec cp {} {}.gate \;
-# Switch back to the current PR
-git checkout -f "${current_pr_head}"
-
-# Check if any *.pom files from the current branch are different from the master
-difference_q=""
-for p in $(find -name "pom.xml"); do
- [[ -f "${p}" && -f "${p}.gate" ]] && \
- difference_q="${difference_q}$(diff $p.gate $p)"
-done
-
-# If no pom files were changed we can easily say no new dependencies were added
-if [ -z "${difference_q}" ]; then
- echo " * This patch does not change any dependencies."
-else
- # Else we need to manually build spark to determine what, if any, dependencies
- # were added into the Spark assembly jar
- ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
- sed -n -e '/Building Spark Project Assembly/,$p' | \
- grep --context=1 -m 2 "Dependencies classpath:" | \
- head -n 3 | \
- tail -n 1 | \
- tr ":" "\n" | \
- rev | \
- cut -d "/" -f 1 | \
- rev | \
- sort > ${CURR_CP_FILE}
-
- # Checkout the master branch to compare against
- git checkout -f master
-
- ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \
- sed -n -e '/Building Spark Project Assembly/,$p' | \
- grep --context=1 -m 2 "Dependencies classpath:" | \
- head -n 3 | \
- tail -n 1 | \
- tr ":" "\n" | \
- rev | \
- cut -d "/" -f 1 | \
- rev | \
- sort > ${MASTER_CP_FILE}
-
- DIFF_RESULTS="`diff ${CURR_CP_FILE} ${MASTER_CP_FILE}`"
-
- if [ -z "${DIFF_RESULTS}" ]; then
- echo " * This patch does not change any dependencies."
- else
- # Pretty print the new dependencies
- added_deps=$(echo "${DIFF_RESULTS}" | grep "<" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}')
- removed_deps=$(echo "${DIFF_RESULTS}" | grep ">" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}')
- added_deps_text=" * This patch **adds the following new dependencies:**\n${added_deps}"
- removed_deps_text=" * This patch **removes the following dependencies:**\n${removed_deps}"
-
- # Construct the final returned message with proper
- return_mssg=""
- [ -n "${added_deps}" ] && return_mssg="${added_deps_text}"
- if [ -n "${removed_deps}" ]; then
- if [ -n "${return_mssg}" ]; then
- return_mssg="${return_mssg}\n${removed_deps_text}"
- else
- return_mssg="${removed_deps_text}"
- fi
- fi
- echo "${return_mssg}"
- fi
-
- # Remove the files we've left over
- [ -f "${CURR_CP_FILE}" ] && rm -f "${CURR_CP_FILE}"
- [ -f "${MASTER_CP_FILE}" ] && rm -f "${MASTER_CP_FILE}"
-
- # Clean up our mess from the Maven builds just in case
- ${MVN_BIN} clean &>/dev/null
-fi