aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2014-09-23 11:45:44 -0700
committerMichael Armbrust <michael@databricks.com>2014-09-23 11:45:44 -0700
commit66bc0f2d675d06cdd48638f124a1ff32be2bf456 (patch)
treef003e968229e85d23b8c8a15acd222db0f740e2b /sql
parent11c10df825419372df61a8d23c51e8c3cc78047f (diff)
downloadspark-66bc0f2d675d06cdd48638f124a1ff32be2bf456.tar.gz
spark-66bc0f2d675d06cdd48638f124a1ff32be2bf456.tar.bz2
spark-66bc0f2d675d06cdd48638f124a1ff32be2bf456.zip
[SPARK-3598][SQL]cast to timestamp should be the same as hive
this patch fixes timestamp smaller than 0 and cast int as timestamp select cast(1000 as timestamp) from src limit 1; should return 1970-01-01 00:00:01, but we now take it as 1000 seconds. also, current implementation has bug when the time is before 1970-01-01 00:00:00. rxin marmbrus chenghao-intel Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #2458 from adrian-wang/timestamp and squashes the following commits: 4274b1d [Daoyuan Wang] set test not related to timezone 1234f66 [Daoyuan Wang] fix timestamp smaller than 0 and cast int as timestamp
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala17
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala16
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc61
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd21
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac1091
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd21
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f2471
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc791
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f1
-rw-r--r--sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc791
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala24
11 files changed, 50 insertions, 15 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 0379275121..f626d09f03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -86,15 +86,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
try Timestamp.valueOf(n) catch { case _: java.lang.IllegalArgumentException => null }
})
case BooleanType =>
- buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0) * 1000))
+ buildCast[Boolean](_, b => new Timestamp((if (b) 1 else 0)))
case LongType =>
- buildCast[Long](_, l => new Timestamp(l * 1000))
+ buildCast[Long](_, l => new Timestamp(l))
case IntegerType =>
- buildCast[Int](_, i => new Timestamp(i * 1000))
+ buildCast[Int](_, i => new Timestamp(i))
case ShortType =>
- buildCast[Short](_, s => new Timestamp(s * 1000))
+ buildCast[Short](_, s => new Timestamp(s))
case ByteType =>
- buildCast[Byte](_, b => new Timestamp(b * 1000))
+ buildCast[Byte](_, b => new Timestamp(b))
// TimestampWritable.decimalToTimestamp
case DecimalType =>
buildCast[BigDecimal](_, d => decimalToTimestamp(d))
@@ -107,11 +107,10 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
private[this] def decimalToTimestamp(d: BigDecimal) = {
- val seconds = d.longValue()
+ val seconds = Math.floor(d.toDouble).toLong
val bd = (d - seconds) * 1000000000
val nanos = bd.intValue()
- // Convert to millis
val millis = seconds * 1000
val t = new Timestamp(millis)
@@ -121,11 +120,11 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression {
}
// Timestamp to long, converting milliseconds to seconds
- private[this] def timestampToLong(ts: Timestamp) = ts.getTime / 1000
+ private[this] def timestampToLong(ts: Timestamp) = Math.floor(ts.getTime / 1000.0).toLong
private[this] def timestampToDouble(ts: Timestamp) = {
// First part is the seconds since the beginning of time, followed by nanosecs.
- ts.getTime / 1000 + ts.getNanos.toDouble / 1000000000
+ Math.floor(ts.getTime / 1000.0).toLong + ts.getNanos.toDouble / 1000000000
}
// Converts Timestamp to string according to Hive TimestampWritable convention
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
index b961346dfc..8b6721d5d8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala
@@ -231,7 +231,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("12.65" cast DecimalType, BigDecimal(12.65))
checkEvaluation(Literal(1) cast LongType, 1)
- checkEvaluation(Cast(Literal(1) cast TimestampType, LongType), 1)
+ checkEvaluation(Cast(Literal(1000) cast TimestampType, LongType), 1.toLong)
+ checkEvaluation(Cast(Literal(-1200) cast TimestampType, LongType), -2.toLong)
+ checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(1.toDouble) cast TimestampType, DoubleType), 1.toDouble)
checkEvaluation(Cast(Literal(sts) cast TimestampType, StringType), sts)
@@ -242,11 +244,11 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast ByteType, ShortType), IntegerType), FloatType), DoubleType), LongType), 5)
checkEvaluation(Cast(Cast(Cast(Cast(
- Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 5)
+ Cast("5" cast ByteType, TimestampType), DecimalType), LongType), StringType), ShortType), 0)
checkEvaluation(Cast(Cast(Cast(Cast(
Cast("5" cast TimestampType, ByteType), DecimalType), LongType), StringType), ShortType), null)
checkEvaluation(Cast(Cast(Cast(Cast(
- Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 5)
+ Cast("5" cast DecimalType, ByteType), TimestampType), LongType), StringType), ShortType), 0)
checkEvaluation(Literal(true) cast IntegerType, 1)
checkEvaluation(Literal(false) cast IntegerType, 0)
checkEvaluation(Cast(Literal(1) cast BooleanType, IntegerType), 1)
@@ -293,16 +295,18 @@ class ExpressionEvaluationSuite extends FunSuite {
test("timestamp casting") {
val millis = 15 * 1000 + 2
+ val seconds = millis * 1000 + 2
val ts = new Timestamp(millis)
val ts1 = new Timestamp(15 * 1000) // a timestamp without the milliseconds part
+ val tss = new Timestamp(seconds)
checkEvaluation(Cast(ts, ShortType), 15)
checkEvaluation(Cast(ts, IntegerType), 15)
checkEvaluation(Cast(ts, LongType), 15)
checkEvaluation(Cast(ts, FloatType), 15.002f)
checkEvaluation(Cast(ts, DoubleType), 15.002)
- checkEvaluation(Cast(Cast(ts, ShortType), TimestampType), ts1)
- checkEvaluation(Cast(Cast(ts, IntegerType), TimestampType), ts1)
- checkEvaluation(Cast(Cast(ts, LongType), TimestampType), ts1)
+ checkEvaluation(Cast(Cast(tss, ShortType), TimestampType), ts)
+ checkEvaluation(Cast(Cast(tss, IntegerType), TimestampType), ts)
+ checkEvaluation(Cast(Cast(tss, LongType), TimestampType), ts)
checkEvaluation(Cast(Cast(millis.toFloat / 1000, TimestampType), FloatType),
millis.toFloat / 1000)
checkEvaluation(Cast(Cast(millis.toDouble / 1000, TimestampType), DoubleType),
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6 b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
new file mode 100644
index 0000000000..8ebf695ba7
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #1-0-69fc614ccea92bbe39f4decc299edcc6
@@ -0,0 +1 @@
+0.001
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000000..5625e59da8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #2-0-732ed232ac592c5e7f7c913a88874fd2
@@ -0,0 +1 @@
+1.2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109 b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
new file mode 100644
index 0000000000..d00491fd7e
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #3-0-76ee270337f664b36cacfc6528ac109
@@ -0,0 +1 @@
+1
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2 b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
new file mode 100644
index 0000000000..5625e59da8
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #4-0-732ed232ac592c5e7f7c913a88874fd2
@@ -0,0 +1 @@
+1.2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247 b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
new file mode 100644
index 0000000000..27de46fdf2
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #5-0-dbd7bcd167d322d6617b884c02c7f247
@@ -0,0 +1 @@
+-0.0010000000000000009
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000000..1d94c8a014
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #6-0-6d2da5cfada03605834e38bc4075bc79
@@ -0,0 +1 @@
+-1.2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
new file mode 100644
index 0000000000..3fbedf693b
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #7-0-1d70654217035f8ce5f64344f4c5a80f
@@ -0,0 +1 @@
+-2
diff --git a/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79 b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
new file mode 100644
index 0000000000..1d94c8a014
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/timestamp cast #8-0-6d2da5cfada03605834e38bc4075bc79
@@ -0,0 +1 @@
+-1.2
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 56bcd95eab..6fc891ba4c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -303,6 +303,30 @@ class HiveQuerySuite extends HiveComparisonTest {
createQueryTest("case statements WITHOUT key #4",
"SELECT (CASE WHEN key > 2 THEN 3 WHEN 2 > key THEN 2 ELSE 0 END) FROM src WHERE key < 15")
+ createQueryTest("timestamp cast #1",
+ "SELECT CAST(CAST(1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #2",
+ "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #3",
+ "SELECT CAST(CAST(1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #4",
+ "SELECT CAST(CAST(1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #5",
+ "SELECT CAST(CAST(-1 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #6",
+ "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #7",
+ "SELECT CAST(CAST(-1200 AS TIMESTAMP) AS INT) FROM src LIMIT 1")
+
+ createQueryTest("timestamp cast #8",
+ "SELECT CAST(CAST(-1.2 AS TIMESTAMP) AS DOUBLE) FROM src LIMIT 1")
+
test("implement identity function using case statement") {
val actual = sql("SELECT (CASE key WHEN key THEN key END) FROM src")
.map { case Row(i: Int) => i }