aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorVenkata Ramana Gollamudi <ramana.gollamudi@huawei.com>2014-10-31 11:30:28 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-31 11:30:28 -0700
commitfa712b309c0e59943aae289dab629b34a13fe20e (patch)
tree5f96444a74af45854723031f64af456e72681ab4 /sql/hive
parent7c41d135709c148d4fa3a1b06b5905715c970519 (diff)
downloadspark-fa712b309c0e59943aae289dab629b34a13fe20e.tar.gz
spark-fa712b309c0e59943aae289dab629b34a13fe20e.tar.bz2
spark-fa712b309c0e59943aae289dab629b34a13fe20e.zip
[SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp values
In org.apache.hadoop.hive.serde2.io.TimestampWritable.set , if the next entry is null then current time stamp object is being reset. However because of this hiveinspectors:unwrap cannot use the same timestamp object without creating a copy. Author: Venkata Ramana G <ramana.gollamudihuawei.com> Author: Venkata Ramana Gollamudi <ramana.gollamudi@huawei.com> Closes #3019 from gvramana/spark_4077 and squashes the following commits: 32d818f [Venkata Ramana Gollamudi] fixed check style fa01e71 [Venkata Ramana Gollamudi] cloned timestamp object as org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala3
-rw-r--r--sql/hive/src/test/resources/data/files/issue-4077-data.txt2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala22
3 files changed, 27 insertions, 0 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index c6103a124d..0439ab97d8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -91,6 +91,9 @@ private[hive] trait HiveInspectors {
if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue
case hdoi: HiveDecimalObjectInspector =>
if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue())
+ // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object
+ // if next timestamp is null, so Timestamp object is cloned
+ case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone()
case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data)
case li: ListObjectInspector =>
Option(li.getList(data))
diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
new file mode 100644
index 0000000000..18067b0a64
--- /dev/null
+++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt
@@ -0,0 +1,2 @@
+2014-12-11 00:00:00,1
+2014-12-11astring00:00:00,2
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 2f3db95882..54c0f017d4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,6 +18,9 @@
package org.apache.spark.sql.hive.execution
import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.{Row, SchemaRDD}
+
+import org.apache.spark.util.Utils
class HiveTableScanSuite extends HiveComparisonTest {
@@ -47,4 +50,23 @@ class HiveTableScanSuite extends HiveComparisonTest {
TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect()
TestHive.sql("drop table tb")
}
+
+ test("Spark-4077: timestamp query for null value") {
+ TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
+ TestHive.sql(
+ """
+ CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY ','
+ LINES TERMINATED BY '\n'
+ """.stripMargin)
+ val location =
+ Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile()
+
+ TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null")
+ assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect()
+ === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null)))
+ TestHive.sql("DROP TABLE timestamp_query_null")
+ }
+
}