diff options
author | Michael Armbrust <michael@databricks.com> | 2015-08-24 23:15:27 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-08-24 23:15:27 -0700 |
commit | 5175ca0c85b10045d12c3fb57b1e52278a413ecf (patch) | |
tree | 8389dcb2fff2c391904c7fa869b649fd411d25c3 | |
parent | a0c0aae1defe5e1e57704065631d201f8e3f6bac (diff) | |
download | spark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.tar.gz spark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.tar.bz2 spark-5175ca0c85b10045d12c3fb57b1e52278a413ecf.zip |
[SPARK-10178] [SQL] HiveComparisionTest should print out dependent tables
In `HiveComparisionTest`s it is possible to fail a query of the form `SELECT * FROM dest1`, where `dest1` is the query that is actually computing the incorrect results. To aid debugging this patch improves the harness to also print these query plans and their results.
Author: Michael Armbrust <michael@databricks.com>
Closes #8388 from marmbrus/generatedTables.
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 2bdb0e1187..4d45249d9c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.execution import java.io._ +import scala.util.control.NonFatal + import org.scalatest.{BeforeAndAfterAll, GivenWhenThen} import org.apache.spark.{Logging, SparkFunSuite} @@ -386,11 +388,45 @@ abstract class HiveComparisonTest hiveCacheFiles.foreach(_.delete()) } + // If this query is reading other tables that were created during this test run + // also print out the query plans and results for those. + val computedTablesMessages: String = try { + val tablesRead = new TestHive.QueryExecution(query).executedPlan.collect { + case ts: HiveTableScan => ts.relation.tableName + }.toSet + + TestHive.reset() + val executions = queryList.map(new TestHive.QueryExecution(_)) + executions.foreach(_.toRdd) + val tablesGenerated = queryList.zip(executions).flatMap { + case (q, e) => e.executedPlan.collect { + case i: InsertIntoHiveTable if tablesRead contains i.table.tableName => + (q, e, i) + } + } + + tablesGenerated.map { case (hiveql, execution, insert) => + s""" + |=== Generated Table === + |$hiveql + |$execution + |== Results == + |${insert.child.execute().collect().mkString("\n")} + """.stripMargin + }.mkString("\n") + + } catch { + case NonFatal(e) => + logError("Failed to compute generated tables", e) + s"Couldn't compute dependent tables: $e" + } + val errorMessage = s""" |Results do not match for $testCaseName: |$hiveQuery\n${hiveQuery.analyzed.output.map(_.name).mkString("\t")} |$resultComparison + |$computedTablesMessages """.stripMargin stringToFile(new File(wrongDirectory, testCaseName), errorMessage + consoleTestCase) |