aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-04-13 16:02:18 -0700
committerMichael Armbrust <michael@databricks.com>2015-04-13 16:02:18 -0700
commitc5602bdc310cc8f82dc304500bebe40217cba785 (patch)
treee8083a7a445cbc435ce390296ff9cd166e306801 /sql
parente63a86abe2794332cdad71d87b72a7c56327a43d (diff)
downloadspark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.gz
spark-c5602bdc310cc8f82dc304500bebe40217cba785.tar.bz2
spark-c5602bdc310cc8f82dc304500bebe40217cba785.zip
[SPARK-5941] [SQL] Unit Test loads the table `src` twice for leftsemijoin.q
In `leftsemijoin.q`, there is a data loading command for table `sales` already, but in `TestHive`, it also created the table `sales`, which causes duplicated records inserted into the `sales`. Author: Cheng Hao <hao.cheng@intel.com> Closes #4506 from chenghao-intel/df_table and squashes the following commits: 0be05f7 [Cheng Hao] Remove the table `sales` creating from TestHive
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala6
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea2
-rw-r--r--sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a8570132
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala15
5 files changed, 12 insertions, 15 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index 27dfabca90..479210d1c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -42,7 +42,7 @@ class InMemoryColumnarQuerySuite extends QueryTest {
.toDF().registerTempTable("sizeTst")
cacheTable("sizeTst")
assert(
- table("sizeTst").queryExecution.logical.statistics.sizeInBytes >
+ table("sizeTst").queryExecution.analyzed.statistics.sizeInBytes >
conf.autoBroadcastJoinThreshold)
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a3497eadd6..6570fa1043 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -262,12 +262,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
|WITH SERDEPROPERTIES ('field.delim'='\\t')
""".stripMargin.cmd,
"INSERT OVERWRITE TABLE serdeins SELECT * FROM src".cmd),
- TestTable("sales",
- s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
- |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
- |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
- """.stripMargin.cmd,
- s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales".cmd),
TestTable("episodes",
s"""CREATE TABLE episodes (title STRING, air_date STRING, doctor INT)
|ROW FORMAT SERDE '${classOf[AvroSerDe].getCanonicalName}'
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
index 25ce912507..a1963ba81e 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-10-89737a8857b5b61cc909e0c797f86aea
@@ -1,4 +1,2 @@
Hank 2
-Hank 2
-Joe 2
Joe 2
diff --git a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013 b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
index 25ce912507..a1963ba81e 100644
--- a/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
+++ b/sql/hive/src/test/resources/golden/leftsemijoin-8-73cad58a10a1483ccb15e94a857013
@@ -1,4 +1,2 @@
Hank 2
-Hank 2
-Joe 2
Joe 2
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index d05e11fcf2..5586a79361 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -25,18 +25,25 @@ import org.apache.spark.sql.hive.test.TestHive
* A set of tests that validates support for Hive SerDe.
*/
class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll {
-
override def beforeAll(): Unit = {
+ import TestHive._
+ import org.apache.hadoop.hive.serde2.RegexSerDe
+ super.beforeAll()
TestHive.cacheTables = false
- super.beforeAll()
+ sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT)
+ |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}'
+ |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)")
+ """.stripMargin)
+ sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales")
}
+ // table sales is not a cache table, and will be clear after reset
+ createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false)
+
createQueryTest(
"Read and write with LazySimpleSerDe (tab separated)",
"SELECT * from serdeins")
- createQueryTest("Read with RegexSerDe", "SELECT * FROM sales")
-
createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes")
createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part")