diff options
author | Daoyuan Wang <daoyuan.wang@intel.com> | 2014-10-28 13:43:25 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-10-28 13:43:25 -0700 |
commit | 47a40f60d62ea69b659959994918d4c640f39d5b (patch) | |
tree | 67582dfaec3140d5e247a9170278e729b6af41c4 /sql/core/src | |
parent | 5807cb40ae178f0395c71b967f02aee853ef8bc9 (diff) | |
download | spark-47a40f60d62ea69b659959994918d4c640f39d5b.tar.gz spark-47a40f60d62ea69b659959994918d4c640f39d5b.tar.bz2 spark-47a40f60d62ea69b659959994918d4c640f39d5b.zip |
[SPARK-3988][SQL] add public API for date type
Add json and python api for date type.
By using Pickle, `java.sql.Date` was serialized as calendar, and recognized in python as `datetime.datetime`.
Author: Daoyuan Wang <daoyuan.wang@intel.com>
Closes #2901 from adrian-wang/spark3988 and squashes the following commits:
c51a24d [Daoyuan Wang] convert datetime to date
5670626 [Daoyuan Wang] minor line combine
f760d8e [Daoyuan Wang] fix indent
444f100 [Daoyuan Wang] fix a typo
1d74448 [Daoyuan Wang] fix scala style
8d7dd22 [Daoyuan Wang] add json and python api for date type
Diffstat (limited to 'sql/core/src')
6 files changed, 37 insertions, 15 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index c4f4ef01d7..ca8706ee68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -444,6 +444,7 @@ class SQLContext(@transient val sparkContext: SparkContext) case ByteType => true case ShortType => true case FloatType => true + case DateType => true case TimestampType => true case ArrayType(_, _) => true case MapType(_, _, _) => true @@ -452,9 +453,9 @@ class SQLContext(@transient val sparkContext: SparkContext) } // Converts value to the type specified by the data type. - // Because Python does not have data types for TimestampType, FloatType, ShortType, and - // ByteType, we need to explicitly convert values in columns of these data types to the desired - // JVM data types. + // Because Python does not have data types for DateType, TimestampType, FloatType, ShortType, + // and ByteType, we need to explicitly convert values in columns of these data types to the + // desired JVM data types. def convert(obj: Any, dataType: DataType): Any = (obj, dataType) match { // TODO: We should check nullable case (null, _) => null @@ -474,6 +475,9 @@ class SQLContext(@transient val sparkContext: SparkContext) case (e, f) => convert(e, f.dataType) }): Row + case (c: java.util.Calendar, DateType) => + new java.sql.Date(c.getTime().getTime()) + case (c: java.util.Calendar, TimestampType) => new java.sql.Timestamp(c.getTime().getTime()) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala index bf32da1b71..047dc85df6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.json import scala.collection.Map import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper} import scala.math.BigDecimal -import java.sql.Timestamp +import java.sql.{Date, Timestamp} import com.fasterxml.jackson.core.JsonProcessingException import com.fasterxml.jackson.databind.ObjectMapper @@ -372,13 +372,20 @@ private[sql] object JsonRDD extends Logging { } } + private def toDate(value: Any): Date = { + value match { + // only support string as date + case value: java.lang.String => Date.valueOf(value) + } + } + private def toTimestamp(value: Any): Timestamp = { value match { - case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong) - case value: java.lang.Long => new Timestamp(value) - case value: java.lang.String => Timestamp.valueOf(value) - } - } + case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong) + case value: java.lang.Long => new Timestamp(value) + case value: java.lang.String => Timestamp.valueOf(value) + } + } private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={ if (value == null) { @@ -396,6 +403,7 @@ private[sql] object JsonRDD extends Logging { case ArrayType(elementType, _) => value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType)) case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct) + case DateType => toDate(value) case TimestampType => toTimestamp(value) } } diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java index 52d07b5425..bc5cd66482 100644 --- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java +++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java @@ -18,6 +18,7 @@ package org.apache.spark.sql.api.java; import java.math.BigDecimal; +import java.sql.Date; import java.sql.Timestamp; import java.util.Arrays; import java.util.HashMap; @@ -39,6 +40,7 @@ public class JavaRowSuite { private boolean booleanValue; private String stringValue; private byte[] binaryValue; + private Date dateValue; private Timestamp timestampValue; @Before @@ -53,6 +55,7 @@ public class JavaRowSuite { booleanValue = true; stringValue = "this is a string"; binaryValue = stringValue.getBytes(); + dateValue = Date.valueOf("2014-06-30"); timestampValue = Timestamp.valueOf("2014-06-30 09:20:00.0"); } @@ -76,6 +79,7 @@ public class JavaRowSuite { new Boolean(booleanValue), stringValue, // StringType binaryValue, // BinaryType + dateValue, // DateType timestampValue, // TimestampType null // null ); @@ -114,9 +118,10 @@ public class JavaRowSuite { Assert.assertEquals(stringValue, simpleRow.getString(15)); Assert.assertEquals(stringValue, simpleRow.get(15)); Assert.assertEquals(binaryValue, simpleRow.get(16)); - Assert.assertEquals(timestampValue, simpleRow.get(17)); - Assert.assertEquals(true, simpleRow.isNullAt(18)); - Assert.assertEquals(null, simpleRow.get(18)); + Assert.assertEquals(dateValue, simpleRow.get(17)); + Assert.assertEquals(timestampValue, simpleRow.get(18)); + Assert.assertEquals(true, simpleRow.isNullAt(19)); + Assert.assertEquals(null, simpleRow.get(19)); } @Test diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java index d099a48a1f..d04396a5f8 100644 --- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java +++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java @@ -39,6 +39,7 @@ public class JavaSideDataTypeConversionSuite { checkDataType(DataType.StringType); checkDataType(DataType.BinaryType); checkDataType(DataType.BooleanType); + checkDataType(DataType.DateType); checkDataType(DataType.TimestampType); checkDataType(DataType.DecimalType); checkDataType(DataType.DoubleType); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala index ff1debff0f..8415af41be 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala @@ -38,6 +38,7 @@ class ScalaSideDataTypeConversionSuite extends FunSuite { checkDataType(org.apache.spark.sql.StringType) checkDataType(org.apache.spark.sql.BinaryType) checkDataType(org.apache.spark.sql.BooleanType) + checkDataType(org.apache.spark.sql.DateType) checkDataType(org.apache.spark.sql.TimestampType) checkDataType(org.apache.spark.sql.DecimalType) checkDataType(org.apache.spark.sql.DoubleType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala index 1ae75546aa..ce6184f5d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.SQLConf import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ -import java.sql.Timestamp +import java.sql.{Date, Timestamp} class JsonSuite extends QueryTest { import TestJsonData._ @@ -58,8 +58,11 @@ class JsonSuite extends QueryTest { checkTypePromotion(new Timestamp(intNumber), enforceCorrectType(intNumber, TimestampType)) checkTypePromotion(new Timestamp(intNumber.toLong), enforceCorrectType(intNumber.toLong, TimestampType)) - val strDate = "2014-09-30 12:34:56" - checkTypePromotion(Timestamp.valueOf(strDate), enforceCorrectType(strDate, TimestampType)) + val strTime = "2014-09-30 12:34:56" + checkTypePromotion(Timestamp.valueOf(strTime), enforceCorrectType(strTime, TimestampType)) + + val strDate = "2014-10-15" + checkTypePromotion(Date.valueOf(strDate), enforceCorrectType(strDate, DateType)) } test("Get compatible type") { |