From 24f338ba7b34df493dd49bbc354d08f5e3afbb85 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 20 Apr 2016 17:56:31 -0700 Subject: [SPARK-14775][SQL] Remove TestHiveSparkSession.rewritePaths ## What changes were proposed in this pull request? The path rewrite in TestHiveSparkSession is pretty hacky. I think we can remove those complexity and just do a string replacement when we read the query files in. This would remove the overloading of runNativeSql in TestHive, which will simplify the removal of Hive specific variable substitution. ## How was this patch tested? This is a small test refactoring to simplify test infrastructure. Author: Reynold Xin Closes #12543 from rxin/SPARK-14775. --- .../org/apache/spark/sql/hive/test/TestHive.scala | 18 ------------------ sql/hive/src/test/resources/hive-test-path-helper.txt | 1 + .../spark/sql/hive/execution/HiveComparisonTest.scala | 14 +++++++++++++- .../spark/sql/hive/execution/HiveQueryFileTest.scala | 6 +++--- 4 files changed, 17 insertions(+), 22 deletions(-) create mode 100644 sql/hive/src/test/resources/hive-test-path-helper.txt (limited to 'sql') diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index e629099086..2bb13996c1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -182,19 +182,6 @@ private[hive] class TestHiveSparkSession( Option(System.getenv(envVar)).map(new File(_)) } - /** - * Replaces relative paths to the parent directory "../" with hiveDevHome since this is how the - * hive test cases assume the system is set up. - */ - private[hive] def rewritePaths(cmd: String): String = - if (cmd.toUpperCase contains "LOAD DATA") { - val testDataLocation = - hiveDevHome.map(_.getCanonicalPath).getOrElse(inRepoTests.getCanonicalPath) - cmd.replaceAll("\\.\\./\\.\\./", testDataLocation + "/") - } else { - cmd - } - val hiveFilesTemp = File.createTempFile("catalystHiveFiles", "") hiveFilesTemp.delete() hiveFilesTemp.mkdir() @@ -566,11 +553,6 @@ private[hive] class TestHiveSessionState(sparkSession: TestHiveSparkSession) override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = { new TestHiveQueryExecution(sparkSession, plan) } - - // Override so we can intercept relative paths and rewrite them to point at hive. - override def runNativeSql(sql: String): Seq[String] = { - super.runNativeSql(sparkSession.rewritePaths(substitutor.substitute(hiveconf, sql))) - } } diff --git a/sql/hive/src/test/resources/hive-test-path-helper.txt b/sql/hive/src/test/resources/hive-test-path-helper.txt new file mode 100644 index 0000000000..356b131ea1 --- /dev/null +++ b/sql/hive/src/test/resources/hive-test-path-helper.txt @@ -0,0 +1 @@ +This file is here so we can match on it and find the path to the current folder. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index bd46cb922e..994dc4a2d2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -47,6 +47,17 @@ import org.apache.spark.sql.hive.test.{TestHive, TestHiveQueryExecution} abstract class HiveComparisonTest extends SparkFunSuite with BeforeAndAfterAll with GivenWhenThen { + /** + * Path to the test datasets. We find this by looking up "hive-test-path-helper.txt" file. + * + * Before we run the query in Spark, we replace "../../data" with this path. + */ + private val testDataPath: String = { + Thread.currentThread.getContextClassLoader + .getResource("hive-test-path-helper.txt") + .getPath.replace("/hive-test-path-helper.txt", "/data") + } + /** * When set, any cache files that result in test failures will be deleted. Used when the test * harness or hive have been updated thus requiring new golden answers to be computed for some @@ -386,7 +397,8 @@ abstract class HiveComparisonTest var query: TestHiveQueryExecution = null try { query = { - val originalQuery = new TestHiveQueryExecution(queryString) + val originalQuery = new TestHiveQueryExecution( + queryString.replace("../../data", testDataPath)) val containsCommands = originalQuery.analyzed.collectFirst { case _: Command => () case _: LogicalInsertIntoHiveTable => () diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala index f96c989c46..e772324a57 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQueryFileTest.scala @@ -40,14 +40,14 @@ abstract class HiveQueryFileTest extends HiveComparisonTest { def testCases: Seq[(String, File)] - val runAll = + val runAll: Boolean = !(System.getProperty("spark.hive.alltests") == null) || runOnlyDirectories.nonEmpty || skipDirectories.nonEmpty - val whiteListProperty = "spark.hive.whitelist" + val whiteListProperty: String = "spark.hive.whitelist" // Allow the whiteList to be overridden by a system property - val realWhiteList = + val realWhiteList: Seq[String] = Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList) // Go through all the test cases and add them to scala test. -- cgit v1.2.3