diff options
author | Josh Rosen <joshrosen@databricks.com> | 2016-01-26 14:20:11 -0800 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2016-01-26 14:20:11 -0800 |
commit | ee74498de372b16fe6350e3617e9e6ec87c6ae7b (patch) | |
tree | 0adf34b8e4c9421d79b04988b4c39e8715e6a5f6 /dev/sparktestsupport/modules.py | |
parent | fbf7623d49525e3aa6b08f482afd7ee8118d80cb (diff) | |
download | spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.gz spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.bz2 spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.zip |
[SPARK-8725][PROJECT-INFRA] Test modules in topologically-sorted order in dev/run-tests
This patch improves our `dev/run-tests` script to test modules in a topologically-sorted order based on modules' dependencies. This will help to ensure that bugs in upstream projects are not misattributed to downstream projects because those projects' tests were the first ones to exhibit the failure
Topological sorting is also useful for shortening the feedback loop when testing pull requests: if I make a change in SQL then the SQL tests should run before MLlib, not after.
In addition, this patch also updates our test module definitions to split `sql` into `catalyst`, `sql`, and `hive` in order to allow more tests to be skipped when changing only `hive/` files.
Author: Josh Rosen <joshrosen@databricks.com>
Closes #10885 from JoshRosen/SPARK-8725.
Diffstat (limited to 'dev/sparktestsupport/modules.py')
-rw-r--r-- | dev/sparktestsupport/modules.py | 54 |
1 files changed, 46 insertions, 8 deletions
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 032c0616ed..07c3078e45 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -15,12 +15,14 @@ # limitations under the License. # +from functools import total_ordering import itertools import re all_modules = [] +@total_ordering class Module(object): """ A module is the basic abstraction in our test runner script. Each module consists of a set of @@ -75,20 +77,56 @@ class Module(object): def contains_file(self, filename): return any(re.match(p, filename) for p in self.source_file_prefixes) + def __repr__(self): + return "Module<%s>" % self.name + + def __lt__(self, other): + return self.name < other.name + + def __eq__(self, other): + return self.name == other.name + + def __ne__(self, other): + return not (self.name == other.name) + + def __hash__(self): + return hash(self.name) + + +catalyst = Module( + name="catalyst", + dependencies=[], + source_file_regexes=[ + "sql/catalyst/", + ], + sbt_test_goals=[ + "catalyst/test", + ], +) + sql = Module( name="sql", - dependencies=[], + dependencies=[catalyst], source_file_regexes=[ - "sql/(?!hive-thriftserver)", + "sql/core/", + ], + sbt_test_goals=[ + "sql/test", + ], +) + +hive = Module( + name="hive", + dependencies=[sql], + source_file_regexes=[ + "sql/hive/", "bin/spark-sql", ], build_profile_flags=[ "-Phive", ], sbt_test_goals=[ - "catalyst/test", - "sql/test", "hive/test", ], test_tags=[ @@ -99,7 +137,7 @@ sql = Module( hive_thriftserver = Module( name="hive-thriftserver", - dependencies=[sql], + dependencies=[hive], source_file_regexes=[ "sql/hive-thriftserver", "sbin/start-thriftserver.sh", @@ -282,7 +320,7 @@ mllib = Module( examples = Module( name="examples", - dependencies=[graphx, mllib, streaming, sql], + dependencies=[graphx, mllib, streaming, hive], source_file_regexes=[ "examples/", ], @@ -314,7 +352,7 @@ pyspark_core = Module( pyspark_sql = Module( name="pyspark-sql", - dependencies=[pyspark_core, sql], + dependencies=[pyspark_core, hive], source_file_regexes=[ "python/pyspark/sql" ], @@ -404,7 +442,7 @@ pyspark_ml = Module( sparkr = Module( name="sparkr", - dependencies=[sql, mllib], + dependencies=[hive, mllib], source_file_regexes=[ "R/", ], |