aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-09-14 14:10:54 -0700
committerDavies Liu <davies.liu@gmail.com>2015-09-14 14:20:49 -0700
commit7e32387ae6303fd1cd32389d47df87170b841c67 (patch)
tree2b5e48032c481b62816c8a2e994b5b605851e59d /sql
parent8a634e9bcc671167613fb575c6c0c054fb4b3479 (diff)
downloadspark-7e32387ae6303fd1cd32389d47df87170b841c67.tar.gz
spark-7e32387ae6303fd1cd32389d47df87170b841c67.tar.bz2
spark-7e32387ae6303fd1cd32389d47df87170b841c67.zip
[SPARK-10522] [SQL] Nanoseconds of Timestamp in Parquet should be positive
Or Hive can't read it back correctly. Thanks vanzin for report this. Author: Davies Liu <davies@databricks.com> Closes #8674 from davies/positive_nano.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala12
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala17
2 files changed, 15 insertions, 14 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index d652fce3fd..687ca000d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -42,6 +42,7 @@ object DateTimeUtils {
final val SECONDS_PER_DAY = 60 * 60 * 24L
final val MICROS_PER_SECOND = 1000L * 1000L
final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L
+ final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY
final val MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L
@@ -190,13 +191,14 @@ object DateTimeUtils {
/**
* Returns Julian day and nanoseconds in a day from the number of microseconds
+ *
+ * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive).
*/
def toJulianDay(us: SQLTimestamp): (Int, Long) = {
- val seconds = us / MICROS_PER_SECOND
- val day = seconds / SECONDS_PER_DAY + JULIAN_DAY_OF_EPOCH
- val secondsInDay = seconds % SECONDS_PER_DAY
- val nanos = (us % MICROS_PER_SECOND) * 1000L
- (day.toInt, secondsInDay * NANOS_PER_SECOND + nanos)
+ val julian_us = us + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY
+ val day = julian_us / MICROS_PER_DAY
+ val micros = julian_us % MICROS_PER_DAY
+ (day.toInt, micros * 1000L)
}
/**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 1596bb79fa..6b9a11f0ff 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -52,15 +52,14 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(ns === 0)
assert(fromJulianDay(d, ns) == 0L)
- val t = Timestamp.valueOf("2015-06-11 10:10:10.100")
- val (d1, ns1) = toJulianDay(fromJavaTimestamp(t))
- val t1 = toJavaTimestamp(fromJulianDay(d1, ns1))
- assert(t.equals(t1))
-
- val t2 = Timestamp.valueOf("2015-06-11 20:10:10.100")
- val (d2, ns2) = toJulianDay(fromJavaTimestamp(t2))
- val t22 = toJavaTimestamp(fromJulianDay(d2, ns2))
- assert(t2.equals(t22))
+ Seq(Timestamp.valueOf("2015-06-11 10:10:10.100"),
+ Timestamp.valueOf("2015-06-11 20:10:10.100"),
+ Timestamp.valueOf("1900-06-11 20:10:10.100")).foreach { t =>
+ val (d, ns) = toJulianDay(fromJavaTimestamp(t))
+ assert(ns > 0)
+ val t1 = toJavaTimestamp(fromJulianDay(d, ns))
+ assert(t.equals(t1))
+ }
}
test("SPARK-6785: java date conversion before and after epoch") {