1 files changed, 15 insertions, 10 deletions
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 8f47728f20..c78a66f6aa 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -29,6 +29,7 @@ from collections import namedtuple
 
 from sparktestsupport import SPARK_HOME, USER_HOME, ERROR_CODES
 from sparktestsupport.shellutils import exit_from_command_with_retcode, run_cmd, rm_r, which
+from sparktestsupport.toposort import toposort_flatten, toposort
 import sparktestsupport.modules as modules
 
 
@@ -43,7 +44,7 @@ def determine_modules_for_files(filenames):
     If a file is not associated with a more specific submodule, then this method will consider that
     file to belong to the 'root' module.
 
-    >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/test/foo"]))
+    >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
     ['pyspark-core', 'sql']
     >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
@@ -99,14 +100,16 @@ def determine_modules_to_test(changed_modules):
     Given a set of modules that have changed, compute the transitive closure of those modules'
     dependent modules in order to determine the set of modules that should be tested.
 
-    >>> sorted(x.name for x in determine_modules_to_test([modules.root]))
+    Returns a topologically-sorted list of modules (ties are broken by sorting on module names).
+
+    >>> [x.name for x in determine_modules_to_test([modules.root])]
     ['root']
-    >>> sorted(x.name for x in determine_modules_to_test([modules.graphx]))
-    ['examples', 'graphx']
-    >>> x = sorted(x.name for x in determine_modules_to_test([modules.sql]))
+    >>> [x.name for x in determine_modules_to_test([modules.graphx])]
+    ['graphx', 'examples']
+    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
     >>> x # doctest: +NORMALIZE_WHITESPACE
-    ['examples', 'hive-thriftserver', 'mllib', 'pyspark-ml', \
-     'pyspark-mllib', 'pyspark-sql', 'sparkr', 'sql']
+    ['sql', 'hive', 'mllib', 'examples', 'hive-thriftserver', 'pyspark-sql', 'sparkr',
+     'pyspark-mllib', 'pyspark-ml']
     """
     # If we're going to have to run all of the tests, then we can just short-circuit
     # and return 'root'. No module depends on root, so if it appears then it will be
@@ -116,7 +119,9 @@ def determine_modules_to_test(changed_modules):
     modules_to_test = set()
     for module in changed_modules:
         modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules))
-    return modules_to_test.union(set(changed_modules))
+    modules_to_test = modules_to_test.union(set(changed_modules))
+    return toposort_flatten(
+        {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
 
 
 def determine_tags_to_exclude(changed_modules):
@@ -377,12 +382,12 @@ def run_scala_tests_maven(test_profiles):
 
 def run_scala_tests_sbt(test_modules, test_profiles):
 
-    sbt_test_goals = set(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
+    sbt_test_goals = list(itertools.chain.from_iterable(m.sbt_test_goals for m in test_modules))
 
     if not sbt_test_goals:
         return
 
-    profiles_and_goals = test_profiles + list(sbt_test_goals)
+    profiles_and_goals = test_profiles + sbt_test_goals
 
     print("[info] Running Spark tests using SBT with these arguments: ",
           " ".join(profiles_and_goals))