aboutsummaryrefslogtreecommitdiff
path: root/dev/run-tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'dev/run-tests.py')
-rwxr-xr-xdev/run-tests.py25
1 files changed, 15 insertions, 10 deletions
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 8f47728f20..c78a66f6aa 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -29,6 +29,7 @@ from collections import namedtuple
from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
from sparktestsupport.shellutils import exit_from_command_with_retcode, run_cmd, rm_r, which
+from sparktestsupport.toposort import toposort_flatten, toposort
import sparktestsupport.modules as modules
@@ -43,7 +44,7 @@ def determine_modules_for_files(filenames):
If a file is not associated with a more specific submodule, then this method will consider that
file to belong to the 'root' module.
- >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/test/foo"]))
+ >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
['pyspark-core', 'sql']
>>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
['root']
@@ -99,14 +100,16 @@ def determine_modules_to_test(changed_modules):
Given a set of modules that have changed, compute the transitive closure of those modules'
dependent modules in order to determine the set of modules that should be tested.
- >>> sorted(x.name for x in determine_modules_to_test([modules.root]))
+ Returns a topologically-sorted list of modules (ties are broken by sorting on module names).
+
+ >>> [x.name for x in determine_modules_to_test([modules.root])]
['root']
- >>> sorted(x.name for x in determine_modules_to_test([modules.graphx]))
- ['examples', 'graphx']
- >>> x = sorted(x.name for x in determine_modules_to_test([modules.sql]))
+ >>> [x.name for x in determine_modules_to_test([modules.graphx])]
+ ['graphx', 'examples']
+ >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
>>> x # doctest: +NORMALIZE_WHITESPACE
- ['examples', 'hive-thriftserver', 'mllib', 'pyspark-ml', \
- 'pyspark-mllib', 'pyspark-sql', 'sparkr', 'sql']
+ ['sql', 'hive', 'mllib', 'examples', 'hive-thriftserver', 'pyspark-sql', 'sparkr',
+ 'pyspark-mllib', 'pyspark-ml']
"""
# If we're going to have to run all of the tests, then we can just short-circuit
# and return 'root'. No module depends on root, so if it appears then it will be
@@ -116,7 +119,9 @@ def determine_modules_to_test(changed_modules):
modules_to_test = set()
for module in changed_modules:
modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules))
- return modules_to_test.union(set(changed_modules))
+ modules_to_test = modules_to_test.union(set(changed_modules))
+ return toposort_flatten(
+ {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
def determine_tags_to_exclude(changed_modules):
@@ -377,12 +382,12 @@ def run_scala_tests_maven(test_profiles):
def run_scala_tests_sbt(test_modules, test_profiles):
- sbt_test_goals = set(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
+ sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
if not sbt_test_goals:
return
- profiles_and_goals = test_profiles + list(sbt_test_goals)
+ profiles_and_goals = test_profiles + sbt_test_goals
print("[info] Running Spark tests using SBT with these arguments: ",
" ".join(profiles_and_goals))