[SPARK-8725][PROJECT-INFRA] Test modules in topologically-sorted order in dev/run-tests

This patch improves our `dev/run-tests` script to test modules in a topologically-sorted order based on modules' dependencies. This will help to ensure that bugs in upstream projects are not misattributed to downstream projects because those projects' tests were the first ones to exhibit the failure Topological sorting is also useful for shortening the feedback loop when testing pull requests: if I make a change in SQL then the SQL tests should run before MLlib, not after. In addition, this patch also updates our test module definitions to split `sql` into `catalyst`, `sql`, and `hive` in order to allow more tests to be skipped when changing only `hive/` files. Author: Josh Rosen <joshrosen@databricks.com> Closes #10885 from JoshRosen/SPARK-8725.
author: Josh Rosen <joshrosen@databricks.com> 2016-01-26 14:20:11 -0800
committer: Josh Rosen <joshrosen@databricks.com> 2016-01-26 14:20:11 -0800
commit: ee74498de372b16fe6350e3617e9e6ec87c6ae7b (patch)
tree: 0adf34b8e4c9421d79b04988b4c39e8715e6a5f6 /dev/sparktestsupport/modules.py
parent: fbf7623d49525e3aa6b08f482afd7ee8118d80cb (diff)
download: spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.gz
spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.bz2
spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.zip
1 files changed, 46 insertions, 8 deletions
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 032c0616ed..07c3078e45 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -15,12 +15,14 @@
 # limitations under the License.
 #
 
+from functools import total_ordering
 import itertools
 import re
 
 all_modules = []
 
 
+@total_ordering
 class Module(object):
     """
     A module is the basic abstraction in our test runner script. Each module consists of a set of
@@ -75,20 +77,56 @@ class Module(object):
     def contains_file(self, filename):
         return any(re.match(p, filename) for p in self.source_file_prefixes)
 
+    def __repr__(self):
+        return "Module<%s>" % self.name
+
+    def __lt__(self, other):
+        return self.name < other.name
+
+    def __eq__(self, other):
+        return self.name == other.name
+
+    def __ne__(self, other):
+        return not (self.name == other.name)
+
+    def __hash__(self):
+        return hash(self.name)
+
+
+catalyst = Module(
+    name="catalyst",
+    dependencies=[],
+    source_file_regexes=[
+        "sql/catalyst/",
+    ],
+    sbt_test_goals=[
+        "catalyst/test",
+    ],
+)
+
 
 sql = Module(
     name="sql",
-    dependencies=[],
+    dependencies=[catalyst],
     source_file_regexes=[
-        "sql/(?!hive-thriftserver)",
+        "sql/core/",
+    ],
+    sbt_test_goals=[
+        "sql/test",
+    ],
+)
+
+hive = Module(
+    name="hive",
+    dependencies=[sql],
+    source_file_regexes=[
+        "sql/hive/",
         "bin/spark-sql",
     ],
     build_profile_flags=[
         "-Phive",
     ],
     sbt_test_goals=[
-        "catalyst/test",
-        "sql/test",
         "hive/test",
     ],
     test_tags=[
@@ -99,7 +137,7 @@ sql = Module(
 
 hive_thriftserver = Module(
     name="hive-thriftserver",
-    dependencies=[sql],
+    dependencies=[hive],
     source_file_regexes=[
         "sql/hive-thriftserver",
         "sbin/start-thriftserver.sh",
@@ -282,7 +320,7 @@ mllib = Module(
 
 examples = Module(
     name="examples",
-    dependencies=[graphx, mllib, streaming, sql],
+    dependencies=[graphx, mllib, streaming, hive],
     source_file_regexes=[
         "examples/",
     ],
@@ -314,7 +352,7 @@ pyspark_core = Module(
 
 pyspark_sql = Module(
     name="pyspark-sql",
-    dependencies=[pyspark_core, sql],
+    dependencies=[pyspark_core, hive],
     source_file_regexes=[
         "python/pyspark/sql"
     ],
@@ -404,7 +442,7 @@ pyspark_ml = Module(
 
 sparkr = Module(
     name="sparkr",
-    dependencies=[sql, mllib],
+    dependencies=[hive, mllib],
     source_file_regexes=[
         "R/",
     ],
author	Josh Rosen <joshrosen@databricks.com>	2016-01-26 14:20:11 -0800
committer	Josh Rosen <joshrosen@databricks.com>	2016-01-26 14:20:11 -0800
commit	ee74498de372b16fe6350e3617e9e6ec87c6ae7b (patch)
tree	0adf34b8e4c9421d79b04988b4c39e8715e6a5f6 /dev/sparktestsupport/modules.py
parent	fbf7623d49525e3aa6b08f482afd7ee8118d80cb (diff)
download	spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.gz spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.bz2 spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.zip