[SPARK-3680][SQL] Fix bug caused by eager typing of HiveGenericUDFs

Typing of UDFs should be lazy as it is often not valid to call `dataType` on an expression until after all of its children are `resolved`. Author: Michael Armbrust <michael@databricks.com> Closes #2525 from marmbrus/concatBug and squashes the following commits: 5b8efe7 [Michael Armbrust] fix bug with eager typing of udfs
author: Michael Armbrust <michael@databricks.com> 2014-09-27 12:10:16 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-09-27 12:10:16 -0700
commit: f0c7e19550d46f81a0a3ff272bbf66ce4bafead6 (patch)
tree: 64681bf1e0005e9c0090490eab4a1d91799acc03 /sql
parent: 0800881051df8029afb22a4ec17970e316a85855 (diff)
download: spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.gz
spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.bz2
spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.zip
2 files changed, 12 insertions, 5 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 68944ed4ef..732e4976f6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -151,7 +151,7 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
     override def get(): AnyRef = wrap(func())
   }
 
-  val dataType: DataType = inspectorToDataType(returnInspector)
+  lazy val dataType: DataType = inspectorToDataType(returnInspector)
 
   override def eval(input: Row): Any = {
     returnInspector // Make sure initialized.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
index e380280f30..86adbbf3ad 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet
 
 import java.io.File
 
+import org.apache.spark.sql.catalyst.expressions.Row
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.QueryTest
@@ -142,15 +143,21 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
   test("sum") {
     checkAnswer(
       sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"),
-      1 + 2 + 3
-    )
+      1 + 2 + 3)
+  }
+
+  test("hive udfs") {
+    checkAnswer(
+      sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"),
+      sql("SELECT stringField FROM partitioned_parquet").map {
+        case Row(s: String) => Row(s + s)
+      }.collect().toSeq)
   }
 
   test("non-part select(*)") {
     checkAnswer(
       sql("SELECT COUNT(*) FROM normal_parquet"),
-      10
-    )
+      10)
   }
 
   test("conversion is working") {
author	Michael Armbrust <michael@databricks.com>	2014-09-27 12:10:16 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-09-27 12:10:16 -0700
commit	f0c7e19550d46f81a0a3ff272bbf66ce4bafead6 (patch)
tree	64681bf1e0005e9c0090490eab4a1d91799acc03 /sql
parent	0800881051df8029afb22a4ec17970e316a85855 (diff)
download	spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.gz spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.bz2 spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.zip