aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorMichael Armbrust <michael@databricks.com>2014-09-27 12:10:16 -0700
committerMichael Armbrust <michael@databricks.com>2014-09-27 12:10:16 -0700
commitf0c7e19550d46f81a0a3ff272bbf66ce4bafead6 (patch)
tree64681bf1e0005e9c0090490eab4a1d91799acc03 /sql
parent0800881051df8029afb22a4ec17970e316a85855 (diff)
downloadspark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.gz
spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.tar.bz2
spark-f0c7e19550d46f81a0a3ff272bbf66ce4bafead6.zip
[SPARK-3680][SQL] Fix bug caused by eager typing of HiveGenericUDFs
Typing of UDFs should be lazy as it is often not valid to call `dataType` on an expression until after all of its children are `resolved`. Author: Michael Armbrust <michael@databricks.com> Closes #2525 from marmbrus/concatBug and squashes the following commits: 5b8efe7 [Michael Armbrust] fix bug with eager typing of udfs
Diffstat (limited to 'sql')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala15
2 files changed, 12 insertions, 5 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 68944ed4ef..732e4976f6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -151,7 +151,7 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq
override def get(): AnyRef = wrap(func())
}
- val dataType: DataType = inspectorToDataType(returnInspector)
+ lazy val dataType: DataType = inspectorToDataType(returnInspector)
override def eval(input: Row): Any = {
returnInspector // Make sure initialized.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
index e380280f30..86adbbf3ad 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.parquet
import java.io.File
+import org.apache.spark.sql.catalyst.expressions.Row
import org.scalatest.BeforeAndAfterAll
import org.apache.spark.sql.QueryTest
@@ -142,15 +143,21 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll {
test("sum") {
checkAnswer(
sql("SELECT SUM(intField) FROM partitioned_parquet WHERE intField IN (1,2,3) AND p = 1"),
- 1 + 2 + 3
- )
+ 1 + 2 + 3)
+ }
+
+ test("hive udfs") {
+ checkAnswer(
+ sql("SELECT concat(stringField, stringField) FROM partitioned_parquet"),
+ sql("SELECT stringField FROM partitioned_parquet").map {
+ case Row(s: String) => Row(s + s)
+ }.collect().toSeq)
}
test("non-part select(*)") {
checkAnswer(
sql("SELECT COUNT(*) FROM normal_parquet"),
- 10
- )
+ 10)
}
test("conversion is working") {