aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-02-12 15:17:25 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-12 15:17:25 -0800
commitc352ffbdb9112714c176a747edff6115e9369e58 (patch)
tree5f46171bf3768b4c8aefbe4449db333d06a32862 /sql
parent0bf031582588723dd5a4ca42e6f9f36bc2da1a0b (diff)
downloadspark-c352ffbdb9112714c176a747edff6115e9369e58.tar.gz
spark-c352ffbdb9112714c176a747edff6115e9369e58.tar.bz2
spark-c352ffbdb9112714c176a747edff6115e9369e58.zip
[SPARK-5758][SQL] Use LongType as the default type for integers in JSON schema inference.
Author: Yin Huai <yhuai@databricks.com> Closes #4544 from yhuai/jsonUseLongTypeByDefault and squashes the following commits: 6e2ffc2 [Yin Huai] Use LongType as the default type for integers in JSON schema inference.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala12
-rw-r--r--sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala16
3 files changed, 17 insertions, 13 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index 1043eefcfc..7dfb304021 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -17,13 +17,12 @@
package org.apache.spark.sql.json
-import java.io.StringWriter
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
import scala.collection.Map
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
-import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException, JsonFactory}
+import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.spark.rdd.RDD
@@ -178,7 +177,12 @@ private[sql] object JsonRDD extends Logging {
}
private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
- ScalaReflection.typeOfObject orElse {
+ // For Integer values, use LongType by default.
+ val useLongType: PartialFunction[Any, DataType] = {
+ case value: IntegerType.JvmType => LongType
+ }
+
+ useLongType orElse ScalaReflection.typeOfObject orElse {
// Since we do not have a data type backed by BigInteger,
// when we see a Java BigInteger, we use DecimalType.
case value: java.math.BigInteger => DecimalType.Unlimited
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
index 2e6e977fdc..643b891ab1 100644
--- a/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java
@@ -164,7 +164,7 @@ public class JavaApplySchemaSuite implements Serializable {
fields.add(DataTypes.createStructField("bigInteger", DataTypes.createDecimalType(), true));
fields.add(DataTypes.createStructField("boolean", DataTypes.BooleanType, true));
fields.add(DataTypes.createStructField("double", DataTypes.DoubleType, true));
- fields.add(DataTypes.createStructField("integer", DataTypes.IntegerType, true));
+ fields.add(DataTypes.createStructField("integer", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("long", DataTypes.LongType, true));
fields.add(DataTypes.createStructField("null", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("string", DataTypes.StringType, true));
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index fde4b47438..b5f13f8bd5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -222,7 +222,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
- StructField("integer", IntegerType, true) ::
+ StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
@@ -252,7 +252,7 @@ class JsonSuite extends QueryTest {
StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
- StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
+ StructField("arrayOfInteger", ArrayType(LongType, false), true) ::
StructField("arrayOfLong", ArrayType(LongType, false), true) ::
StructField("arrayOfNull", ArrayType(StringType, true), true) ::
StructField("arrayOfString", ArrayType(StringType, false), true) ::
@@ -265,7 +265,7 @@ class JsonSuite extends QueryTest {
StructField("field1", BooleanType, true) ::
StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
StructField("structWithArrayFields", StructType(
- StructField("field1", ArrayType(IntegerType, false), true) ::
+ StructField("field1", ArrayType(LongType, false), true) ::
StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)
assert(expectedSchema === jsonDF.schema)
@@ -486,7 +486,7 @@ class JsonSuite extends QueryTest {
val jsonDF = jsonRDD(complexFieldValueTypeConflict)
val expectedSchema = StructType(
- StructField("array", ArrayType(IntegerType, false), true) ::
+ StructField("array", ArrayType(LongType, false), true) ::
StructField("num_struct", StringType, true) ::
StructField("str_array", StringType, true) ::
StructField("struct", StructType(
@@ -540,7 +540,7 @@ class JsonSuite extends QueryTest {
val expectedSchema = StructType(
StructField("a", BooleanType, true) ::
StructField("b", LongType, true) ::
- StructField("c", ArrayType(IntegerType, false), true) ::
+ StructField("c", ArrayType(LongType, false), true) ::
StructField("d", StructType(
StructField("field", BooleanType, true) :: Nil), true) ::
StructField("e", StringType, true) :: Nil)
@@ -560,7 +560,7 @@ class JsonSuite extends QueryTest {
StructField("bigInteger", DecimalType.Unlimited, true) ::
StructField("boolean", BooleanType, true) ::
StructField("double", DoubleType, true) ::
- StructField("integer", IntegerType, true) ::
+ StructField("integer", LongType, true) ::
StructField("long", LongType, true) ::
StructField("null", StringType, true) ::
StructField("string", StringType, true) :: Nil)
@@ -781,12 +781,12 @@ class JsonSuite extends QueryTest {
ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
StructField("field2",
ArrayType(ArrayType(
- StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
+ StructType(StructField("Test", LongType, true) :: Nil), false), true), true) ::
StructField("field3",
ArrayType(ArrayType(
StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
StructField("field4",
- ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
+ ArrayType(ArrayType(ArrayType(LongType, false), true), false), true) :: Nil)
assert(schema === jsonDF.schema)