From a5f9fdbba3bbefb56ca9ab33271301a2ff0834b5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 11 May 2016 09:31:22 -0700 Subject: [SPARK-15268][SQL] Make JavaTypeInference work with UDTRegistration ## What changes were proposed in this pull request? We have a private `UDTRegistration` API to register user defined type. Currently `JavaTypeInference` can't work with it. So `SparkSession.createDataFrame` from a bean class will not correctly infer the schema of the bean class. ## How was this patch tested? `VectorUDTSuite`. Author: Liang-Chi Hsieh Closes #13046 from viirya/fix-udt-registry-javatypeinference. --- .../org/apache/spark/ml/linalg/VectorUDTSuite.scala | 16 ++++++++++++++++ .../apache/spark/sql/catalyst/JavaTypeInference.scala | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala index 6d01d8f282..7b50876d33 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/linalg/VectorUDTSuite.scala @@ -17,9 +17,19 @@ package org.apache.spark.ml.linalg +import scala.beans.BeanInfo + import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.JavaTypeInference import org.apache.spark.sql.types._ +@BeanInfo +case class LabeledPoint(label: Double, features: Vector) { + override def toString: String = { + s"($label,$features)" + } +} + class VectorUDTSuite extends SparkFunSuite { test("preloaded VectorUDT") { @@ -36,4 +46,10 @@ class VectorUDTSuite extends SparkFunSuite { assert(udt.simpleString == "vector") } } + + test("JavaTypeInference with VectorUDT") { + val (dataType, _) = JavaTypeInference.inferDataType(classOf[LabeledPoint]) + assert(dataType.asInstanceOf[StructType].fields.map(_.dataType) + === Seq(new VectorUDT, DoubleType)) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 6f9fbbbead..92caf8f406 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -63,6 +63,11 @@ object JavaTypeInference { case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) => (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true) + case c: Class[_] if UDTRegistration.exists(c.getName) => + val udt = UDTRegistration.getUDTFor(c.getName).get.newInstance() + .asInstanceOf[UserDefinedType[_ >: Null]] + (udt, true) + case c: Class[_] if c == classOf[java.lang.String] => (StringType, true) case c: Class[_] if c == classOf[Array[Byte]] => (BinaryType, true) -- cgit v1.2.3