aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-06-19 14:14:03 -0700
committerReynold Xin <rxin@apache.org>2014-06-19 14:14:03 -0700
commit777c5958c4088182f9e2daba435ccb413a2f69d7 (patch)
tree40bb56c41a92f321e53225aa3fafb059b7d02aa4 /sql
parentbce0897bc6b0fc9bca5444dbe3a9e75523ad7481 (diff)
downloadspark-777c5958c4088182f9e2daba435ccb413a2f69d7.tar.gz
spark-777c5958c4088182f9e2daba435ccb413a2f69d7.tar.bz2
spark-777c5958c4088182f9e2daba435ccb413a2f69d7.zip
[SPARK-2191][SQL] Make sure InsertIntoHiveTable doesn't execute more than once.
Author: Michael Armbrust <michael@databricks.com> Closes #1129 from marmbrus/doubleCreateAs and squashes the following commits: 9c6d9e4 [Michael Armbrust] Fix typo. 5128fe2 [Michael Armbrust] Make sure InsertIntoHiveTable doesn't execute each time you ask for its result.
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala6
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala6
2 files changed, 11 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
index a839231449..240aa0df49 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/hiveOperators.scala
@@ -344,12 +344,16 @@ case class InsertIntoHiveTable(
writer.commitJob()
}
+ override def execute() = result
+
/**
* Inserts all the rows in the table into Hive. Row objects are properly serialized with the
* `org.apache.hadoop.hive.serde2.SerDe` and the
* `org.apache.hadoop.mapred.OutputFormat` provided by the table definition.
+ *
+ * Note: this is run once and then kept to avoid double insertions.
*/
- def execute() = {
+ private lazy val result: RDD[Row] = {
val childRdd = child.execute()
assert(childRdd != null)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 8b2bdd513b..5118f4b3f9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -28,6 +28,12 @@ import org.apache.spark.sql.{SchemaRDD, execution, Row}
*/
class HiveQuerySuite extends HiveComparisonTest {
+ test("CREATE TABLE AS runs once") {
+ hql("CREATE TABLE foo AS SELECT 1 FROM src LIMIT 1").collect()
+ assert(hql("SELECT COUNT(*) FROM foo").collect().head.getLong(0) === 1,
+ "Incorrect number of rows in created table")
+ }
+
createQueryTest("between",
"SELECT * FROM src WHERE key Between 1 and 2")