aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorKevin Cox <kevincox@kevincox.ca>2015-09-16 15:30:17 -0700
committerReynold Xin <rxin@databricks.com>2015-09-16 15:30:17 -0700
commitd39f15ea2b8bed5342d2f8e3c1936f915c470783 (patch)
treedf0f20c09e51dbdbbaa68465d7488e605d9dffcc /sql
parent896edb51ab7a88bbb31259e565311a9be6f2ca6d (diff)
downloadspark-d39f15ea2b8bed5342d2f8e3c1936f915c470783.tar.gz
spark-d39f15ea2b8bed5342d2f8e3c1936f915c470783.tar.bz2
spark-d39f15ea2b8bed5342d2f8e3c1936f915c470783.zip
[SPARK-9794] [SQL] Fix datetime parsing in SparkSQL.
This fixes https://issues.apache.org/jira/browse/SPARK-9794 by using a real ISO8601 parser. (courtesy of the xml component of the standard java library) cc: angelini Author: Kevin Cox <kevincox@kevincox.ca> Closes #8396 from kevincox/kevincox-sql-time-parsing.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala27
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala32
2 files changed, 42 insertions, 17 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 687ca000d1..400c4327be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}
import java.util.{TimeZone, Calendar}
+import javax.xml.bind.DatatypeConverter;
import org.apache.spark.unsafe.types.UTF8String
@@ -109,30 +110,22 @@ object DateTimeUtils {
}
def stringToTime(s: String): java.util.Date = {
- if (!s.contains('T')) {
+ var indexOfGMT = s.indexOf("GMT");
+ if (indexOfGMT != -1) {
+ // ISO8601 with a weird time zone specifier (2000-01-01T00:00GMT+01:00)
+ val s0 = s.substring(0, indexOfGMT)
+ val s1 = s.substring(indexOfGMT + 3)
+ // Mapped to 2000-01-01T00:00+01:00
+ stringToTime(s0 + s1)
+ } else if (!s.contains('T')) {
// JDBC escape string
if (s.contains(' ')) {
Timestamp.valueOf(s)
} else {
Date.valueOf(s)
}
- } else if (s.endsWith("Z")) {
- // this is zero timezone of ISO8601
- stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
- } else if (s.indexOf("GMT") == -1) {
- // timezone with ISO8601
- val inset = "+00.00".length
- val s0 = s.substring(0, s.length - inset)
- val s1 = s.substring(s.length - inset, s.length)
- if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
- stringToTime(s0 + "GMT" + s1)
- } else {
- stringToTime(s0 + ".0GMT" + s1)
- }
} else {
- // ISO8601 with GMT insert
- val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
- ISO8601GMT.parse(s)
+ DatatypeConverter.parseDateTime(s).getTime()
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 6b9a11f0ff..46335941b6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -136,6 +136,38 @@ class DateTimeUtilsSuite extends SparkFunSuite {
assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
}
+ test("string to time") {
+ // Tests with UTC.
+ var c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
+ c.set(Calendar.MILLISECOND, 0)
+
+ c.set(1900, 0, 1, 0, 0, 0)
+ assert(stringToTime("1900-01-01T00:00:00GMT-00:00") === c.getTime())
+
+ c.set(2000, 11, 30, 10, 0, 0)
+ assert(stringToTime("2000-12-30T10:00:00Z") === c.getTime())
+
+ // Tests with set time zone.
+ c.setTimeZone(TimeZone.getTimeZone("GMT-04:00"))
+ c.set(Calendar.MILLISECOND, 0)
+
+ c.set(1900, 0, 1, 0, 0, 0)
+ assert(stringToTime("1900-01-01T00:00:00-04:00") === c.getTime())
+
+ c.set(1900, 0, 1, 0, 0, 0)
+ assert(stringToTime("1900-01-01T00:00:00GMT-04:00") === c.getTime())
+
+ // Tests with local time zone.
+ c.setTimeZone(TimeZone.getDefault())
+ c.set(Calendar.MILLISECOND, 0)
+
+ c.set(2000, 11, 30, 0, 0, 0)
+ assert(stringToTime("2000-12-30") === new Date(c.getTimeInMillis()))
+
+ c.set(2000, 11, 30, 10, 0, 0)
+ assert(stringToTime("2000-12-30 10:00:00") === new Timestamp(c.getTimeInMillis()))
+ }
+
test("string to timestamp") {
var c = Calendar.getInstance()
c.set(1969, 11, 31, 16, 0, 0)