[SPARK-11827][SQL] Adding java.math.BigInteger support in Java type inference for POJOs and Java collections

Hello : Can you help check this PR? I am adding support for the java.math.BigInteger for java bean code path. I saw internally spark is converting the BigInteger to BigDecimal in ColumnType.scala and CatalystRowConverter.scala. I use the similar way and convert the BigInteger to the BigDecimal. . Author: Kevin Yu <qyu@us.ibm.com> Closes #10125 from kevinyu98/working_on_spark-11827.
author: Kevin Yu <qyu@us.ibm.com> 2016-05-20 12:41:14 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2016-05-20 12:41:14 +0800
commit: 17591d90e6873f30a042112f56a1686726ccbd60 (patch)
tree: d155359ab9626077375c3531500475a294846416 /sql/core
parent: d5c47f8ff8c09ff017e896835db044661ee60909 (diff)
download: spark-17591d90e6873f30a042112f56a1686726ccbd60.tar.gz
spark-17591d90e6873f30a042112f56a1686726ccbd60.tar.bz2
spark-17591d90e6873f30a042112f56a1686726ccbd60.zip
2 files changed, 17 insertions, 4 deletions
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index 324ebbae38..35a9f44fec 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -21,6 +21,8 @@ import java.io.Serializable;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.*;
+import java.math.BigInteger;
+import java.math.BigDecimal;
 
 import scala.collection.JavaConverters;
 import scala.collection.Seq;
@@ -130,6 +132,7 @@ public class JavaDataFrameSuite {
     private Integer[] b = { 0, 1 };
     private Map<String, int[]> c = ImmutableMap.of("hello", new int[] { 1, 2 });
     private List<String> d = Arrays.asList("floppy", "disk");
+    private BigInteger e = new BigInteger("1234567");
 
     public double getA() {
       return a;
@@ -146,6 +149,8 @@ public class JavaDataFrameSuite {
     public List<String> getD() {
       return d;
     }
+
+    public BigInteger getE() { return e; }
   }
 
   void validateDataFrameWithBeans(Bean bean, Dataset<Row> df) {
@@ -163,7 +168,9 @@ public class JavaDataFrameSuite {
     Assert.assertEquals(
       new StructField("d", new ArrayType(DataTypes.StringType, true), true, Metadata.empty()),
       schema.apply("d"));
-    Row first = df.select("a", "b", "c", "d").first();
+    Assert.assertEquals(new StructField("e", DataTypes.createDecimalType(38,0), true, Metadata.empty()),
+      schema.apply("e"));
+    Row first = df.select("a", "b", "c", "d", "e").first();
     Assert.assertEquals(bean.getA(), first.getDouble(0), 0.0);
     // Now Java lists and maps are converted to Scala Seq's and Map's. Once we get a Seq below,
     // verify that it has the expected length, and contains expected elements.
@@ -182,6 +189,8 @@ public class JavaDataFrameSuite {
     for (int i = 0; i < d.length(); i++) {
       Assert.assertEquals(bean.getD().get(i), d.apply(i));
     }
+      // Java.math.BigInteger is equavient to Spark Decimal(38,0)
+    Assert.assertEquals(new BigDecimal(bean.getE()), first.getDecimal(4));
   }
 
   @Test
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
index 491bdb3ef9..c9bd05d0e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
@@ -34,7 +34,9 @@ case class ReflectData(
     decimalField: java.math.BigDecimal,
     date: Date,
     timestampField: Timestamp,
-    seqInt: Seq[Int])
+    seqInt: Seq[Int],
+    javaBigInt: java.math.BigInteger,
+    scalaBigInt: scala.math.BigInt)
 
 case class NullReflectData(
     intField: java.lang.Integer,
@@ -77,13 +79,15 @@ class ScalaReflectionRelationSuite extends SparkFunSuite with SharedSQLContext {
 
   test("query case class RDD") {
     val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
-      new java.math.BigDecimal(1), Date.valueOf("1970-01-01"), new Timestamp(12345), Seq(1, 2, 3))
+      new java.math.BigDecimal(1), Date.valueOf("1970-01-01"), new Timestamp(12345), Seq(1, 2, 3),
+      new java.math.BigInteger("1"), scala.math.BigInt(1))
     Seq(data).toDF().createOrReplaceTempView("reflectData")
 
     assert(sql("SELECT * FROM reflectData").collect().head ===
       Row("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true,
         new java.math.BigDecimal(1), Date.valueOf("1970-01-01"),
-        new Timestamp(12345), Seq(1, 2, 3)))
+        new Timestamp(12345), Seq(1, 2, 3), new java.math.BigDecimal(1),
+        new java.math.BigDecimal(1)))
   }
 
   test("query case class RDD with nulls") {
author	Kevin Yu <qyu@us.ibm.com>	2016-05-20 12:41:14 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2016-05-20 12:41:14 +0800
commit	17591d90e6873f30a042112f56a1686726ccbd60 (patch)
tree	d155359ab9626077375c3531500475a294846416 /sql/core
parent	d5c47f8ff8c09ff017e896835db044661ee60909 (diff)
download	spark-17591d90e6873f30a042112f56a1686726ccbd60.tar.gz spark-17591d90e6873f30a042112f56a1686726ccbd60.tar.bz2 spark-17591d90e6873f30a042112f56a1686726ccbd60.zip