aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorTakeshi Yamamuro <yamamuro@apache.org>2017-03-16 08:50:01 +0800
committerWenchen Fan <wenchen@databricks.com>2017-03-16 08:50:01 +0800
commit21f333c635465069b7657d788052d510ffb0779a (patch)
treed574b69f428ba5586d88475592f0e7775955dc32 /sql/catalyst
parentfc9314671c8a082ae339fd6df177a2b684c65d40 (diff)
downloadspark-21f333c635465069b7657d788052d510ffb0779a.tar.gz
spark-21f333c635465069b7657d788052d510ffb0779a.tar.bz2
spark-21f333c635465069b7657d788052d510ffb0779a.zip
[SPARK-19751][SQL] Throw an exception if bean class has one's own class in fields
## What changes were proposed in this pull request? The current master throws `StackOverflowError` in `createDataFrame`/`createDataset` if bean has one's own class in fields; ``` public class SelfClassInFieldBean implements Serializable { private SelfClassInFieldBean child; ... } ``` This pr added code to throw `UnsupportedOperationException` in that case as soon as possible. ## How was this patch tested? Added tests in `JavaDataFrameSuite` and `JavaDatasetSuite`. Author: Takeshi Yamamuro <yamamuro@apache.org> Closes #17188 from maropu/SPARK-19751.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala19
1 files changed, 13 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index e9d9508e5a..4ff87edde1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -69,7 +69,8 @@ object JavaTypeInference {
* @param typeToken Java type
* @return (SQL data type, nullable)
*/
- private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
+ private def inferDataType(typeToken: TypeToken[_], seenTypeSet: Set[Class[_]] = Set.empty)
+ : (DataType, Boolean) = {
typeToken.getRawType match {
case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
(c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true)
@@ -104,26 +105,32 @@ object JavaTypeInference {
case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true)
case _ if typeToken.isArray =>
- val (dataType, nullable) = inferDataType(typeToken.getComponentType)
+ val (dataType, nullable) = inferDataType(typeToken.getComponentType, seenTypeSet)
(ArrayType(dataType, nullable), true)
case _ if iterableType.isAssignableFrom(typeToken) =>
- val (dataType, nullable) = inferDataType(elementType(typeToken))
+ val (dataType, nullable) = inferDataType(elementType(typeToken), seenTypeSet)
(ArrayType(dataType, nullable), true)
case _ if mapType.isAssignableFrom(typeToken) =>
val (keyType, valueType) = mapKeyValueType(typeToken)
- val (keyDataType, _) = inferDataType(keyType)
- val (valueDataType, nullable) = inferDataType(valueType)
+ val (keyDataType, _) = inferDataType(keyType, seenTypeSet)
+ val (valueDataType, nullable) = inferDataType(valueType, seenTypeSet)
(MapType(keyDataType, valueDataType, nullable), true)
case other =>
+ if (seenTypeSet.contains(other)) {
+ throw new UnsupportedOperationException(
+ "Cannot have circular references in bean class, but got the circular reference " +
+ s"of class $other")
+ }
+
// TODO: we should only collect properties that have getter and setter. However, some tests
// pass in scala case class as java bean class which doesn't have getter and setter.
val properties = getJavaBeanReadableProperties(other)
val fields = properties.map { property =>
val returnType = typeToken.method(property.getReadMethod).getReturnType
- val (dataType, nullable) = inferDataType(returnType)
+ val (dataType, nullable) = inferDataType(returnType, seenTypeSet + other)
new StructField(property.getName, dataType, nullable)
}
(new StructType(fields), true)