aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2015-08-24 23:15:27 -0700
committerReynold Xin <rxin@databricks.com>2015-08-24 23:15:27 -0700
commit5175ca0c85b10045d12c3fb57b1e52278a413ecf (patch)
tree8389dcb2fff2c391904c7fa869b649fd411d25c3 /sql
parenta0c0aae1defe5e1e57704065631d201f8e3f6bac (diff)
downloadspark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.tar.gz
spark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.tar.bz2
spark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.zip
[SPARK-10178] [SQL] HiveComparisionTest should print out dependent tables
In `HiveComparisionTest`s it is possible to fail a query of the form `SELECT * FROM dest1`, where `dest1` is the query that is actually computing the incorrect results. To aid debugging this patch improves the harness to also print these query plans and their results. Author: Michael Armbrust <michael@databricks.com> Closes #8388 from marmbrus/generatedTables.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala36
1 files changed, 36 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 2bdb0e1187..4d45249d9c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.execution
import java.io._
+import scala.util.control.NonFatal
+
import org.scalatest.{BeforeAndAfterAll, GivenWhenThen}
import org.apache.spark.{Logging, SparkFunSuite}
@@ -386,11 +388,45 @@ abstract class HiveComparisonTest
hiveCacheFiles.foreach(_.delete())
}
+ // If this query is reading other tables that were created during this test run
+ // also print out the query plans and results for those.
+ val computedTablesMessages: String = try {
+ val tablesRead = new TestHive.QueryExecution(query).executedPlan.collect {
+ case ts: HiveTableScan => ts.relation.tableName
+ }.toSet
+
+ TestHive.reset()
+ val executions = queryList.map(new TestHive.QueryExecution(_))
+ executions.foreach(_.toRdd)
+ val tablesGenerated = queryList.zip(executions).flatMap {
+ case (q, e) => e.executedPlan.collect {
+ case i: InsertIntoHiveTable if tablesRead contains i.table.tableName =>
+ (q, e, i)
+ }
+ }
+
+ tablesGenerated.map { case (hiveql, execution, insert) =>
+ s"""
+ |=== Generated Table ===
+ |$hiveql
+ |$execution
+ |== Results ==
+ |${insert.child.execute().collect().mkString("\n")}
+ """.stripMargin
+ }.mkString("\n")
+
+ } catch {
+ case NonFatal(e) =>
+ logError("Failed to compute generated tables", e)
+ s"Couldn't compute dependent tables: $e"
+ }
+
val errorMessage =
s"""
|Results do not match for $testCaseName:
|$hiveQuery\n${hiveQuery.analyzed.output.map(_.name).mkString("\t")}
|$resultComparison
+ |$computedTablesMessages
""".stripMargin
stringToFile(new File(wrongDirectory, testCaseName), errorMessage + consoleTestCase)