aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-02-22 12:42:23 -0800
committerWenchen Fan <wenchen@databricks.com>2017-02-22 12:42:23 -0800
commit37112fcfcd64db8f84f437e5c54cc3ea039c68f6 (patch)
tree10daabc020ac169fa7fdd827aa38c0d2c49ea396 /sql/catalyst
parent1f86e795b87ba93640062f29e87a032924d94b2a (diff)
downloadspark-37112fcfcd64db8f84f437e5c54cc3ea039c68f6.tar.gz
spark-37112fcfcd64db8f84f437e5c54cc3ea039c68f6.tar.bz2
spark-37112fcfcd64db8f84f437e5c54cc3ea039c68f6.zip
[SPARK-19666][SQL] Skip a property without getter in Java schema inference and allow empty bean in encoder creation
## What changes were proposed in this pull request? This PR proposes to fix two. **Skip a property without a getter in beans** Currently, if we use a JavaBean without the getter as below: ```java public static class BeanWithoutGetter implements Serializable { private String a; public void setA(String a) { this.a = a; } } BeanWithoutGetter bean = new BeanWithoutGetter(); List<BeanWithoutGetter> data = Arrays.asList(bean); spark.createDataFrame(data, BeanWithoutGetter.class).show(); ``` - Before It throws an exception as below: ``` java.lang.NullPointerException at org.spark_project.guava.reflect.TypeToken.method(TypeToken.java:465) at org.apache.spark.sql.catalyst.JavaTypeInference$$anonfun$2.apply(JavaTypeInference.scala:126) at org.apache.spark.sql.catalyst.JavaTypeInference$$anonfun$2.apply(JavaTypeInference.scala:125) ``` - After ``` ++ || ++ || ++ ``` **Supports empty bean in encoder creation** ```java public static class EmptyBean implements Serializable {} EmptyBean bean = new EmptyBean(); List<EmptyBean> data = Arrays.asList(bean); spark.createDataset(data, Encoders.bean(EmptyBean.class)).show(); ``` - Before throws an exception as below: ``` java.lang.UnsupportedOperationException: Cannot infer type for class EmptyBean because it is not bean-compliant at org.apache.spark.sql.catalyst.JavaTypeInference$.org$apache$spark$sql$catalyst$JavaTypeInference$$serializerFor(JavaTypeInference.scala:436) at org.apache.spark.sql.catalyst.JavaTypeInference$.serializerFor(JavaTypeInference.scala:341) ``` - After ``` ++ || ++ || ++ ``` ## How was this patch tested? Unit test in `JavaDataFrameSuite`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #17013 from HyukjinKwon/SPARK-19666.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala45
1 files changed, 21 insertions, 24 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 8b53d988cb..e9d9508e5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -117,11 +117,10 @@ object JavaTypeInference {
val (valueDataType, nullable) = inferDataType(valueType)
(MapType(keyDataType, valueDataType, nullable), true)
- case _ =>
+ case other =>
// TODO: we should only collect properties that have getter and setter. However, some tests
// pass in scala case class as java bean class which doesn't have getter and setter.
- val beanInfo = Introspector.getBeanInfo(typeToken.getRawType)
- val properties = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+ val properties = getJavaBeanReadableProperties(other)
val fields = properties.map { property =>
val returnType = typeToken.method(property.getReadMethod).getReturnType
val (dataType, nullable) = inferDataType(returnType)
@@ -131,10 +130,15 @@ object JavaTypeInference {
}
}
- private def getJavaBeanProperties(beanClass: Class[_]): Array[PropertyDescriptor] = {
+ def getJavaBeanReadableProperties(beanClass: Class[_]): Array[PropertyDescriptor] = {
val beanInfo = Introspector.getBeanInfo(beanClass)
- beanInfo.getPropertyDescriptors
- .filter(p => p.getReadMethod != null && p.getWriteMethod != null)
+ beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+ .filter(_.getReadMethod != null)
+ }
+
+ private def getJavaBeanReadableAndWritableProperties(
+ beanClass: Class[_]): Array[PropertyDescriptor] = {
+ getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
}
private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
@@ -298,9 +302,7 @@ object JavaTypeInference {
keyData :: valueData :: Nil)
case other =>
- val properties = getJavaBeanProperties(other)
- assert(properties.length > 0)
-
+ val properties = getJavaBeanReadableAndWritableProperties(other)
val setters = properties.map { p =>
val fieldName = p.getName
val fieldType = typeToken.method(p.getReadMethod).getReturnType
@@ -417,21 +419,16 @@ object JavaTypeInference {
)
case other =>
- val properties = getJavaBeanProperties(other)
- if (properties.length > 0) {
- CreateNamedStruct(properties.flatMap { p =>
- val fieldName = p.getName
- val fieldType = typeToken.method(p.getReadMethod).getReturnType
- val fieldValue = Invoke(
- inputObject,
- p.getReadMethod.getName,
- inferExternalType(fieldType.getRawType))
- expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType) :: Nil
- })
- } else {
- throw new UnsupportedOperationException(
- s"Cannot infer type for class ${other.getName} because it is not bean-compliant")
- }
+ val properties = getJavaBeanReadableAndWritableProperties(other)
+ CreateNamedStruct(properties.flatMap { p =>
+ val fieldName = p.getName
+ val fieldType = typeToken.method(p.getReadMethod).getReturnType
+ val fieldValue = Invoke(
+ inputObject,
+ p.getReadMethod.getName,
+ inferExternalType(fieldType.getRawType))
+ expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType) :: Nil
+ })
}
}
}