aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2014-10-28 13:43:25 -0700
committerMichael Armbrust <michael@databricks.com>2014-10-28 13:43:25 -0700
commit47a40f60d62ea69b659959994918d4c640f39d5b (patch)
tree67582dfaec3140d5e247a9170278e729b6af41c4 /sql
parent5807cb40ae178f0395c71b967f02aee853ef8bc9 (diff)
downloadspark-47a40f60d62ea69b659959994918d4c640f39d5b.tar.gz
spark-47a40f60d62ea69b659959994918d4c640f39d5b.tar.bz2
spark-47a40f60d62ea69b659959994918d4c640f39d5b.zip
[SPARK-3988][SQL] add public API for date type
Add json and python api for date type. By using Pickle, `java.sql.Date` was serialized as calendar, and recognized in python as `datetime.datetime`. Author: Daoyuan Wang <daoyuan.wang@intel.com> Closes #2901 from adrian-wang/spark3988 and squashes the following commits: c51a24d [Daoyuan Wang] convert datetime to date 5670626 [Daoyuan Wang] minor line combine f760d8e [Daoyuan Wang] fix indent 444f100 [Daoyuan Wang] fix a typo 1d74448 [Daoyuan Wang] fix scala style 8d7dd22 [Daoyuan Wang] add json and python api for date type
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala4
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala9
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala10
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala20
-rw-r--r--sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java11
-rw-r--r--sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java1
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala1
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala9
9 files changed, 48 insertions, 18 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 7d930fccd5..d76c743d3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -112,6 +112,7 @@ object ScalaReflection {
case obj: FloatType.JvmType => FloatType
case obj: DoubleType.JvmType => DoubleType
case obj: DecimalType.JvmType => DecimalType
+ case obj: DateType.JvmType => DateType
case obj: TimestampType.JvmType => TimestampType
case null => NullType
// For other cases, there is no obvious mapping from the type of the given object to a
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
index 0cf139ebde..b9cf37d53f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala
@@ -91,6 +91,7 @@ object DataType {
| "BinaryType" ^^^ BinaryType
| "BooleanType" ^^^ BooleanType
| "DecimalType" ^^^ DecimalType
+ | "DateType" ^^^ DateType
| "TimestampType" ^^^ TimestampType
)
@@ -198,7 +199,8 @@ trait PrimitiveType extends DataType {
}
object PrimitiveType {
- private[sql] val all = Seq(DecimalType, TimestampType, BinaryType) ++ NativeType.all
+ private[sql] val all = Seq(DecimalType, DateType, TimestampType, BinaryType) ++
+ NativeType.all
private[sql] val nameToType = all.map(t => t.typeName -> t).toMap
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index 488e373854..430f0664b7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst
import java.math.BigInteger
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
import org.scalatest.FunSuite
@@ -43,6 +43,7 @@ case class NullableData(
booleanField: java.lang.Boolean,
stringField: String,
decimalField: BigDecimal,
+ dateField: Date,
timestampField: Timestamp,
binaryField: Array[Byte])
@@ -96,6 +97,7 @@ class ScalaReflectionSuite extends FunSuite {
StructField("booleanField", BooleanType, nullable = true),
StructField("stringField", StringType, nullable = true),
StructField("decimalField", DecimalType, nullable = true),
+ StructField("dateField", DateType, nullable = true),
StructField("timestampField", TimestampType, nullable = true),
StructField("binaryField", BinaryType, nullable = true))),
nullable = true))
@@ -199,8 +201,11 @@ class ScalaReflectionSuite extends FunSuite {
// DecimalType
assert(DecimalType === typeOfObject(BigDecimal("1.7976931348623157E318")))
+ // DateType
+ assert(DateType === typeOfObject(Date.valueOf("2014-07-25")))
+
// TimestampType
- assert(TimestampType === typeOfObject(java.sql.Timestamp.valueOf("2014-07-25 10:26:00")))
+ assert(TimestampType === typeOfObject(Timestamp.valueOf("2014-07-25 10:26:00")))
// NullType
assert(NullType === typeOfObject(null))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index c4f4ef01d7..ca8706ee68 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -444,6 +444,7 @@ class SQLContext(@transient val sparkContext: SparkContext)
case ByteType => true
case ShortType => true
case FloatType => true
+ case DateType => true
case TimestampType => true
case ArrayType(_, _) => true
case MapType(_, _, _) => true
@@ -452,9 +453,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
}
// Converts value to the type specified by the data type.
- // Because Python does not have data types for TimestampType, FloatType, ShortType, and
- // ByteType, we need to explicitly convert values in columns of these data types to the desired
- // JVM data types.
+ // Because Python does not have data types for DateType, TimestampType, FloatType, ShortType,
+ // and ByteType, we need to explicitly convert values in columns of these data types to the
+ // desired JVM data types.
def convert(obj: Any, dataType: DataType): Any = (obj, dataType) match {
// TODO: We should check nullable
case (null, _) => null
@@ -474,6 +475,9 @@ class SQLContext(@transient val sparkContext: SparkContext)
case (e, f) => convert(e, f.dataType)
}): Row
+ case (c: java.util.Calendar, DateType) =>
+ new java.sql.Date(c.getTime().getTime())
+
case (c: java.util.Calendar, TimestampType) =>
new java.sql.Timestamp(c.getTime().getTime())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index bf32da1b71..047dc85df6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.json
import scala.collection.Map
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
import scala.math.BigDecimal
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
import com.fasterxml.jackson.core.JsonProcessingException
import com.fasterxml.jackson.databind.ObjectMapper
@@ -372,13 +372,20 @@ private[sql] object JsonRDD extends Logging {
}
}
+ private def toDate(value: Any): Date = {
+ value match {
+ // only support string as date
+ case value: java.lang.String => Date.valueOf(value)
+ }
+ }
+
private def toTimestamp(value: Any): Timestamp = {
value match {
- case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
- case value: java.lang.Long => new Timestamp(value)
- case value: java.lang.String => Timestamp.valueOf(value)
- }
- }
+ case value: java.lang.Integer => new Timestamp(value.asInstanceOf[Int].toLong)
+ case value: java.lang.Long => new Timestamp(value)
+ case value: java.lang.String => Timestamp.valueOf(value)
+ }
+ }
private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={
if (value == null) {
@@ -396,6 +403,7 @@ private[sql] object JsonRDD extends Logging {
case ArrayType(elementType, _) =>
value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType))
case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct)
+ case DateType => toDate(value)
case TimestampType => toTimestamp(value)
}
}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
index 52d07b5425..bc5cd66482 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaRowSuite.java
@@ -18,6 +18,7 @@
package org.apache.spark.sql.api.java;
import java.math.BigDecimal;
+import java.sql.Date;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.HashMap;
@@ -39,6 +40,7 @@ public class JavaRowSuite {
private boolean booleanValue;
private String stringValue;
private byte[] binaryValue;
+ private Date dateValue;
private Timestamp timestampValue;
@Before
@@ -53,6 +55,7 @@ public class JavaRowSuite {
booleanValue = true;
stringValue = "this is a string";
binaryValue = stringValue.getBytes();
+ dateValue = Date.valueOf("2014-06-30");
timestampValue = Timestamp.valueOf("2014-06-30 09:20:00.0");
}
@@ -76,6 +79,7 @@ public class JavaRowSuite {
new Boolean(booleanValue),
stringValue, // StringType
binaryValue, // BinaryType
+ dateValue, // DateType
timestampValue, // TimestampType
null // null
);
@@ -114,9 +118,10 @@ public class JavaRowSuite {
Assert.assertEquals(stringValue, simpleRow.getString(15));
Assert.assertEquals(stringValue, simpleRow.get(15));
Assert.assertEquals(binaryValue, simpleRow.get(16));
- Assert.assertEquals(timestampValue, simpleRow.get(17));
- Assert.assertEquals(true, simpleRow.isNullAt(18));
- Assert.assertEquals(null, simpleRow.get(18));
+ Assert.assertEquals(dateValue, simpleRow.get(17));
+ Assert.assertEquals(timestampValue, simpleRow.get(18));
+ Assert.assertEquals(true, simpleRow.isNullAt(19));
+ Assert.assertEquals(null, simpleRow.get(19));
}
@Test
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
index d099a48a1f..d04396a5f8 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaSideDataTypeConversionSuite.java
@@ -39,6 +39,7 @@ public class JavaSideDataTypeConversionSuite {
checkDataType(DataType.StringType);
checkDataType(DataType.BinaryType);
checkDataType(DataType.BooleanType);
+ checkDataType(DataType.DateType);
checkDataType(DataType.TimestampType);
checkDataType(DataType.DecimalType);
checkDataType(DataType.DoubleType);
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
index ff1debff0f..8415af41be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/api/java/ScalaSideDataTypeConversionSuite.scala
@@ -38,6 +38,7 @@ class ScalaSideDataTypeConversionSuite extends FunSuite {
checkDataType(org.apache.spark.sql.StringType)
checkDataType(org.apache.spark.sql.BinaryType)
checkDataType(org.apache.spark.sql.BooleanType)
+ checkDataType(org.apache.spark.sql.DateType)
checkDataType(org.apache.spark.sql.TimestampType)
checkDataType(org.apache.spark.sql.DecimalType)
checkDataType(org.apache.spark.sql.DoubleType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 1ae75546aa..ce6184f5d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SQLConf
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.test.TestSQLContext._
-import java.sql.Timestamp
+import java.sql.{Date, Timestamp}
class JsonSuite extends QueryTest {
import TestJsonData._
@@ -58,8 +58,11 @@ class JsonSuite extends QueryTest {
checkTypePromotion(new Timestamp(intNumber), enforceCorrectType(intNumber, TimestampType))
checkTypePromotion(new Timestamp(intNumber.toLong),
enforceCorrectType(intNumber.toLong, TimestampType))
- val strDate = "2014-09-30 12:34:56"
- checkTypePromotion(Timestamp.valueOf(strDate), enforceCorrectType(strDate, TimestampType))
+ val strTime = "2014-09-30 12:34:56"
+ checkTypePromotion(Timestamp.valueOf(strTime), enforceCorrectType(strTime, TimestampType))
+
+ val strDate = "2014-10-15"
+ checkTypePromotion(Date.valueOf(strDate), enforceCorrectType(strDate, DateType))
}
test("Get compatible type") {