aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2014-11-10 17:26:03 -0800
committerMichael Armbrust <michael@databricks.com>2014-11-10 17:26:03 -0800
commita1fc059b69c9ed150bf8a284404cc149ddaa27d6 (patch)
tree33d50edfe84ef6afd9a252e68525e1a3330ea1db /sql
parentfa777833b52b6f339cdc335e8e3935cfe9a2a7eb (diff)
downloadspark-a1fc059b69c9ed150bf8a284404cc149ddaa27d6.tar.gz
spark-a1fc059b69c9ed150bf8a284404cc149ddaa27d6.tar.bz2
spark-a1fc059b69c9ed150bf8a284404cc149ddaa27d6.zip
[SPARK-4149][SQL] ISO 8601 support for json date time strings
This implement the feature davies mentioned in https://github.com/apache/spark/pull/2901#discussion-diff-19313312 Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #3012 from adrian-wang/iso8601 and squashes the following commits: 50df6e7 [Daoyuan Wang] json data timestamp ISO8601 support
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala5
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala30
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala7
3 files changed, 40 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 0f2dcdcacf..d9d7a3fea3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.json
import org.apache.spark.sql.catalyst.types.decimal.Decimal
+import org.apache.spark.sql.types.util.DataTypeConversions
import scala.collection.Map
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
@@ -378,7 +379,7 @@ private[sql] object JsonRDD extends Logging {
private def toDate(value: Any): Date = {
value match {
// only support string as date
- case value: java.lang.String => Date.valueOf(value)
+ case value: java.lang.String => new Date(DataTypeConversions.stringToTime(value).getTime)
}
}
@@ -386,7 +387,7 @@ private[sql] object JsonRDD extends Logging {
value match {
case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
case value: java.lang.Long => new Timestamp(value)
- case value: java.lang.String => Timestamp.valueOf(value)
+ case value: java.lang.String => toTimestamp(DataTypeConversions.stringToTime(value).getTime)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
index 9aad7b3df4..d4258156f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.types.util
+import java.text.SimpleDateFormat
+
import scala.collection.JavaConverters._
import org.apache.spark.sql._
@@ -129,6 +131,34 @@ protected[sql] object DataTypeConversions {
StructType(structType.getFields.map(asScalaStructField))
}
+ def stringToTime(s: String): java.util.Date = {
+ if (!s.contains('T')) {
+ // JDBC escape string
+ if (s.contains(' ')) {
+ java.sql.Timestamp.valueOf(s)
+ } else {
+ java.sql.Date.valueOf(s)
+ }
+ } else if (s.endsWith("Z")) {
+ // this is zero timezone of ISO8601
+ stringToTime(s.substring(0, s.length - 1) + "GMT-00:00")
+ } else if (s.indexOf("GMT") == -1) {
+ // timezone with ISO8601
+ val inset = "+00.00".length
+ val s0 = s.substring(0, s.length - inset)
+ val s1 = s.substring(s.length - inset, s.length)
+ if (s0.substring(s0.lastIndexOf(':')).contains('.')) {
+ stringToTime(s0 + "GMT" + s1)
+ } else {
+ stringToTime(s0 + ".0GMT" + s1)
+ }
+ } else {
+ // ISO8601 with GMT insert
+ val ISO8601GMT: SimpleDateFormat = new SimpleDateFormat( "yyyy-MM-dd'T'HH:mm:ss.SSSz" )
+ ISO8601GMT.parse(s)
+ }
+ }
+
/** Converts Java objects to catalyst rows / types */
def convertJavaToCatalyst(a: Any, dataType: DataType): Any = (a, dataType) match {
case (obj, udt: UserDefinedType[_]) => ScalaReflection.convertToCatalyst(obj, udt) // Scala type
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index cade244f7a..f8ca2c773d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -66,6 +66,13 @@ class JsonSuite extends QueryTest {
val strDate = "2014-10-15"
checkTypePromotion(Date.valueOf(strDate), enforceCorrectType(strDate, DateType))
+
+ val ISO8601Time1 = "1970-01-01T01:00:01.0Z"
+ checkTypePromotion(new Timestamp(3601000), enforceCorrectType(ISO8601Time1, TimestampType))
+ checkTypePromotion(new Date(3601000), enforceCorrectType(ISO8601Time1, DateType))
+ val ISO8601Time2 = "1970-01-01T02:00:01-01:00"
+ checkTypePromotion(new Timestamp(10801000), enforceCorrectType(ISO8601Time2, TimestampType))
+ checkTypePromotion(new Date(10801000), enforceCorrectType(ISO8601Time2, DateType))
}
test("Get compatible type") {