[SPARK-5941] [SQL] Unit Test loads the table `src` twice for leftsemijoin.q

In `leftsemijoin.q`, there is a data loading command for table `sales` already, but in `TestHive`, it also created the table `sales`, which causes duplicated records inserted into the `sales`. Author: Cheng Hao <hao.cheng@intel.com> Closes #4506 from chenghao-intel/df_table and squashes the following commits: 0be05f7 [Cheng Hao] Remove the table `sales` creating from TestHive
author: Cheng Hao <hao.cheng@intel.com> 2015-04-13 16:02:18 -0700
committer: Michael Armbrust <michael@databricks.com> 2015-04-13 16:02:18 -0700
commit: c5602bdc310cc8f82dc304500bebe40217cba785 (patch)
tree: e8083a7a445cbc435ce390296ff9cd166e306801 /sql
parent: e63a86abe2794332cdad71d87b72a7c56327a43d (diff)
download: spark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.gz
spark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.bz2
spark-c5602bdc310cc8f82dc304500bebe40217cba785.zip
5 files changed, 12 insertions, 15 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index 27dfabca90..479210d1c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -42,7 +42,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
       .toDF().registerTempTable("sizeTst")
     cacheTable("sizeTst")
     assert(
-      table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
+      table("sizeTst").queryExecution.analyzed.statistics.sizeInBytes >
         conf.autoBroadcastJoinThreshold)
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a3497eadd6..6570fa1043 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -262,12 +262,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
          |WITH SERDEPROPERTIES ('field.delim'='\\t')
        """.stripMargin.cmd,
       "INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
-    TestTable("sales",
-      s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
-         |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
-         |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
-       """.stripMargin.cmd,
-      s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
     TestTable("episodes",
       s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
          |ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
index 25ce912507..a1963ba81e 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
@@ -1,4 +1,2 @@
 Hank	2
-Hank	2
-Joe	2
 Joe	2
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013 b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
index 25ce912507..a1963ba81e 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
@@ -1,4 +1,2 @@
 Hank	2
-Hank	2
-Joe	2
 Joe	2
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index d05e11fcf2..5586a79361 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -25,18 +25,25 @@ import org.apache.spark.sql.hive.test.TestHive
  * A set of tests that validates support for Hive SerDe.
  */
 class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
-
   override def beforeAll(): Unit = {
+    import TestHive._
+    import org.apache.hadoop.hive.serde2.RegexSerDe
+      super.beforeAll()
     TestHive.cacheTables = false
-    super.beforeAll()
+    sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
+       |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
+       |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
+       """.stripMargin)
+    sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales")
   }
 
+  // table sales is not a cache table, and will be clear after reset
+  createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false)
+
   createQueryTest(
     "Read and write with LazySimpleSerDe (tab separated)",
     "SELECT * from serdeins")
 
-  createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
-
   createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
 
   createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")
author	Cheng Hao <hao.cheng@intel.com>	2015-04-13 16:02:18 -0700
committer	Michael Armbrust <michael@databricks.com>	2015-04-13 16:02:18 -0700
commit	c5602bdc310cc8f82dc304500bebe40217cba785 (patch)
tree	e8083a7a445cbc435ce390296ff9cd166e306801 /sql
parent	e63a86abe2794332cdad71d87b72a7c56327a43d (diff)
download	spark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.gz spark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.bz2 spark-c5602bdc310cc8f82dc304500bebe40217cba785.zip