aboutsummaryrefslogtreecommitdiff
path: root/dev/sparktestsupport/modules.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2016-01-26 14:20:11 -0800
committerJosh Rosen <joshrosen@databricks.com>2016-01-26 14:20:11 -0800
commitee74498de372b16fe6350e3617e9e6ec87c6ae7b (patch)
tree0adf34b8e4c9421d79b04988b4c39e8715e6a5f6 /dev/sparktestsupport/modules.py
parentfbf7623d49525e3aa6b08f482afd7ee8118d80cb (diff)
downloadspark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.gz
spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.tar.bz2
spark-ee74498de372b16fe6350e3617e9e6ec87c6ae7b.zip
[SPARK-8725][PROJECT-INFRA] Test modules in topologically-sorted order in dev/run-tests
This patch improves our `dev/run-tests` script to test modules in a topologically-sorted order based on modules' dependencies. This will help to ensure that bugs in upstream projects are not misattributed to downstream projects because those projects' tests were the first ones to exhibit the failure Topological sorting is also useful for shortening the feedback loop when testing pull requests: if I make a change in SQL then the SQL tests should run before MLlib, not after. In addition, this patch also updates our test module definitions to split `sql` into `catalyst`, `sql`, and `hive` in order to allow more tests to be skipped when changing only `hive/` files. Author: Josh Rosen <joshrosen@databricks.com> Closes #10885 from JoshRosen/SPARK-8725.
Diffstat (limited to 'dev/sparktestsupport/modules.py')
-rw-r--r--dev/sparktestsupport/modules.py54
1 files changed, 46 insertions, 8 deletions
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 032c0616ed..07c3078e45 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -15,12 +15,14 @@
# limitations under the License.
#
+from functools import total_ordering
import itertools
import re
all_modules = []
+@total_ordering
class Module(object):
"""
A module is the basic abstraction in our test runner script. Each module consists of a set of
@@ -75,20 +77,56 @@ class Module(object):
def contains_file(self, filename):
return any(re.match(p, filename) for p in self.source_file_prefixes)
+ def __repr__(self):
+ return "Module<%s>" % self.name
+
+ def __lt__(self, other):
+ return self.name < other.name
+
+ def __eq__(self, other):
+ return self.name == other.name
+
+ def __ne__(self, other):
+ return not (self.name == other.name)
+
+ def __hash__(self):
+ return hash(self.name)
+
+
+catalyst = Module(
+ name="catalyst",
+ dependencies=[],
+ source_file_regexes=[
+ "sql/catalyst/",
+ ],
+ sbt_test_goals=[
+ "catalyst/test",
+ ],
+)
+
sql = Module(
name="sql",
- dependencies=[],
+ dependencies=[catalyst],
source_file_regexes=[
- "sql/(?!hive-thriftserver)",
+ "sql/core/",
+ ],
+ sbt_test_goals=[
+ "sql/test",
+ ],
+)
+
+hive = Module(
+ name="hive",
+ dependencies=[sql],
+ source_file_regexes=[
+ "sql/hive/",
"bin/spark-sql",
],
build_profile_flags=[
"-Phive",
],
sbt_test_goals=[
- "catalyst/test",
- "sql/test",
"hive/test",
],
test_tags=[
@@ -99,7 +137,7 @@ sql = Module(
hive_thriftserver = Module(
name="hive-thriftserver",
- dependencies=[sql],
+ dependencies=[hive],
source_file_regexes=[
"sql/hive-thriftserver",
"sbin/start-thriftserver.sh",
@@ -282,7 +320,7 @@ mllib = Module(
examples = Module(
name="examples",
- dependencies=[graphx, mllib, streaming, sql],
+ dependencies=[graphx, mllib, streaming, hive],
source_file_regexes=[
"examples/",
],
@@ -314,7 +352,7 @@ pyspark_core = Module(
pyspark_sql = Module(
name="pyspark-sql",
- dependencies=[pyspark_core, sql],
+ dependencies=[pyspark_core, hive],
source_file_regexes=[
"python/pyspark/sql"
],
@@ -404,7 +442,7 @@ pyspark_ml = Module(
sparkr = Module(
name="sparkr",
- dependencies=[sql, mllib],
+ dependencies=[hive, mllib],
source_file_regexes=[
"R/",
],