diff options
author | Michael Armbrust <michael@databricks.com> | 2014-09-27 12:10:16 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-09-27 12:10:16 -0700 |
commit | f0c7e19550d46f81a0a3ff272bbf66ce4bafead6 (patch) | |
tree | 64681bf1e0005e9c0090490eab4a1d91799acc03 | |
parent | 0800881051df8029afb22a4ec17970e316a85855 (diff) | |
download | spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.gz spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.bz2 spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.zip |
[SPARK-3680][SQL] Fix bug caused by eager typing of HiveGenericUDFs
Typing of UDFs should be lazy as it is often not valid to call `dataType` on an expression until after all of its children are `resolved`.
Author: Michael Armbrust <michael@databricks.com>
Closes #2525 from marmbrus/concatBug and squashes the following commits:
5b8efe7 [Michael Armbrust] fix bug with eager typing of udfs
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 2 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala | 15 |
2 files changed, 12 insertions, 5 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 68944ed4ef..732e4976f6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -151,7 +151,7 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq override def get(): AnyRef = wrap(func()) } - val dataType: DataType = inspectorToDataType(returnInspector) + lazy val dataType: DataType = inspectorToDataType(returnInspector) override def eval(input: Row): Any = { returnInspector // Make sure initialized. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala index e380280f30..86adbbf3ad 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet import java.io.File +import org.apache.spark.sql.catalyst.expressions.Row import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest @@ -142,15 +143,21 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll { test("sum") { checkAnswer( sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"), - 1 + 2 + 3 - ) + 1 + 2 + 3) + } + + test("hive udfs") { + checkAnswer( + sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"), + sql("SELECT stringField FROM partitioned_parquet").map { + case Row(s: String) => Row(s + s) + }.collect().toSeq) } test("non-part select(*)") { checkAnswer( sql("SELECT COUNT(*) FROM normal_parquet"), - 10 - ) + 10) } test("conversion is working") { |