From fa712b309c0e59943aae289dab629b34a13fe20e Mon Sep 17 00:00:00 2001 From: Venkata Ramana Gollamudi Date: Fri, 31 Oct 2014 11:30:28 -0700 Subject: [SPARK-4077][SQL] Spark SQL return wrong values for valid string timestamp values In org.apache.hadoop.hive.serde2.io.TimestampWritable.set , if the next entry is null then current time stamp object is being reset. However because of this hiveinspectors:unwrap cannot use the same timestamp object without creating a copy. Author: Venkata Ramana G Author: Venkata Ramana Gollamudi Closes #3019 from gvramana/spark_4077 and squashes the following commits: 32d818f [Venkata Ramana Gollamudi] fixed check style fa01e71 [Venkata Ramana Gollamudi] cloned timestamp object as org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object --- .../org/apache/spark/sql/hive/HiveInspectors.scala | 3 +++ .../test/resources/data/files/issue-4077-data.txt | 2 ++ .../sql/hive/execution/HiveTableScanSuite.scala | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 sql/hive/src/test/resources/data/files/issue-4077-data.txt (limited to 'sql/hive') diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index c6103a124d..0439ab97d8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -91,6 +91,9 @@ private[hive] trait HiveInspectors { if (data == null) null else hvoi.getPrimitiveJavaObject(data).getValue case hdoi: HiveDecimalObjectInspector => if (data == null) null else BigDecimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) + // org.apache.hadoop.hive.serde2.io.TimestampWritable.set will reset current time object + // if next timestamp is null, so Timestamp object is cloned + case ti: TimestampObjectInspector => ti.getPrimitiveJavaObject(data).clone() case pi: PrimitiveObjectInspector => pi.getPrimitiveJavaObject(data) case li: ListObjectInspector => Option(li.getList(data)) diff --git a/sql/hive/src/test/resources/data/files/issue-4077-data.txt b/sql/hive/src/test/resources/data/files/issue-4077-data.txt new file mode 100644 index 0000000000..18067b0a64 --- /dev/null +++ b/sql/hive/src/test/resources/data/files/issue-4077-data.txt @@ -0,0 +1,2 @@ +2014-12-11 00:00:00,1 +2014-12-11astring00:00:00,2 diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index 2f3db95882..54c0f017d4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -18,6 +18,9 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.{Row, SchemaRDD} + +import org.apache.spark.util.Utils class HiveTableScanSuite extends HiveComparisonTest { @@ -47,4 +50,23 @@ class HiveTableScanSuite extends HiveComparisonTest { TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } + + test("Spark-4077: timestamp query for null value") { + TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") + TestHive.sql( + """ + CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) + ROW FORMAT DELIMITED + FIELDS TERMINATED BY ',' + LINES TERMINATED BY '\n' + """.stripMargin) + val location = + Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() + + TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") + assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() + === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null))) + TestHive.sql("DROP TABLE timestamp_query_null") + } + } -- cgit v1.2.3