aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/test/java
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2015-12-22 19:41:44 +0800
committerCheng Lian <lian@databricks.com>2015-12-22 19:41:44 +0800
commit42bfde29836529251a4337ea8cfc539c9c8b04b8 (patch)
treed631e1909fc8dc1f94bd0d3bc6e33cdc57b786c7 /sql/core/src/test/java
parent8c1b867cee816d0943184c7b485cd11e255d8130 (diff)
downloadspark-42bfde29836529251a4337ea8cfc539c9c8b04b8.tar.gz
spark-42bfde29836529251a4337ea8cfc539c9c8b04b8.tar.bz2
spark-42bfde29836529251a4337ea8cfc539c9c8b04b8.zip
[SPARK-12371][SQL] Runtime nullability check for NewInstance
This PR adds a new expression `AssertNotNull` to ensure non-nullable fields of products and case classes don't receive null values at runtime. Author: Cheng Lian <lian@databricks.com> Closes #10331 from liancheng/dataset-nullability-check.
Diffstat (limited to 'sql/core/src/test/java')
-rw-r--r--sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java126
1 files changed, 125 insertions, 1 deletions
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 0dbaeb81c7..9f8db39e33 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -23,6 +23,8 @@ import java.sql.Date;
import java.sql.Timestamp;
import java.util.*;
+import com.google.common.base.Objects;
+import org.junit.rules.ExpectedException;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
@@ -39,7 +41,6 @@ import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.test.TestSQLContext;
import org.apache.spark.sql.catalyst.encoders.OuterScopes;
import org.apache.spark.sql.catalyst.expressions.GenericRow;
-import org.apache.spark.sql.types.DecimalType;
import org.apache.spark.sql.types.StructType;
import static org.apache.spark.sql.functions.*;
@@ -741,4 +742,127 @@ public class JavaDatasetSuite implements Serializable {
context.createDataset(Arrays.asList(obj), Encoders.bean(SimpleJavaBean2.class));
ds.collect();
}
+
+ public class SmallBean implements Serializable {
+ private String a;
+
+ private int b;
+
+ public int getB() {
+ return b;
+ }
+
+ public void setB(int b) {
+ this.b = b;
+ }
+
+ public String getA() {
+ return a;
+ }
+
+ public void setA(String a) {
+ this.a = a;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ SmallBean smallBean = (SmallBean) o;
+ return b == smallBean.b && com.google.common.base.Objects.equal(a, smallBean.a);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(a, b);
+ }
+ }
+
+ public class NestedSmallBean implements Serializable {
+ private SmallBean f;
+
+ public SmallBean getF() {
+ return f;
+ }
+
+ public void setF(SmallBean f) {
+ this.f = f;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ NestedSmallBean that = (NestedSmallBean) o;
+ return Objects.equal(f, that.f);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(f);
+ }
+ }
+
+ @Rule
+ public transient ExpectedException nullabilityCheck = ExpectedException.none();
+
+ @Test
+ public void testRuntimeNullabilityCheck() {
+ OuterScopes.addOuterScope(this);
+
+ StructType schema = new StructType()
+ .add("f", new StructType()
+ .add("a", StringType, true)
+ .add("b", IntegerType, true), true);
+
+ // Shouldn't throw runtime exception since it passes nullability check.
+ {
+ Row row = new GenericRow(new Object[] {
+ new GenericRow(new Object[] {
+ "hello", 1
+ })
+ });
+
+ DataFrame df = context.createDataFrame(Collections.singletonList(row), schema);
+ Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class));
+
+ SmallBean smallBean = new SmallBean();
+ smallBean.setA("hello");
+ smallBean.setB(1);
+
+ NestedSmallBean nestedSmallBean = new NestedSmallBean();
+ nestedSmallBean.setF(smallBean);
+
+ Assert.assertEquals(ds.collectAsList(), Collections.singletonList(nestedSmallBean));
+ }
+
+ // Shouldn't throw runtime exception when parent object (`ClassData`) is null
+ {
+ Row row = new GenericRow(new Object[] { null });
+
+ DataFrame df = context.createDataFrame(Collections.singletonList(row), schema);
+ Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class));
+
+ NestedSmallBean nestedSmallBean = new NestedSmallBean();
+ Assert.assertEquals(ds.collectAsList(), Collections.singletonList(nestedSmallBean));
+ }
+
+ nullabilityCheck.expect(RuntimeException.class);
+ nullabilityCheck.expectMessage(
+ "Null value appeared in non-nullable field " +
+ "test.org.apache.spark.sql.JavaDatasetSuite$SmallBean.b of type int.");
+
+ {
+ Row row = new GenericRow(new Object[] {
+ new GenericRow(new Object[] {
+ "hello", null
+ })
+ });
+
+ DataFrame df = context.createDataFrame(Collections.singletonList(row), schema);
+ Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class));
+
+ ds.collect();
+ }
+ }
}