diff options
author | Cheng Hao <hao.cheng@intel.com> | 2015-07-26 18:34:19 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-07-26 18:34:19 -0700 |
commit | 1efe97dc9ed31e3b8727b81be633b7e96dd3cd34 (patch) | |
tree | 39e1093bc6ae17efce96ca21502d40ab69e8960c /sql/hive | |
parent | c025c3d0a1fdfbc45b64db9c871176b40b4a7b9b (diff) | |
download | spark-1efe97dc9ed31e3b8727b81be633b7e96dd3cd34.tar.gz spark-1efe97dc9ed31e3b8727b81be633b7e96dd3cd34.tar.bz2 spark-1efe97dc9ed31e3b8727b81be633b7e96dd3cd34.zip |
[SPARK-8867][SQL] Support list / describe function usage
As Hive does, we need to list all of the registered UDF and its usage for user.
We add the annotation to describe a UDF, so we can get the literal description info while registering the UDF.
e.g.
```scala
ExpressionDescription(
usage = "_FUNC_(expr) - Returns the absolute value of the numeric value",
extended = """> SELECT _FUNC_('-1')
1""")
case class Abs(child: Expression) extends UnaryArithmetic {
...
```
Author: Cheng Hao <hao.cheng@intel.com>
Closes #7259 from chenghao-intel/desc_function and squashes the following commits:
cf29bba [Cheng Hao] fixing the code style issue
5193855 [Cheng Hao] Add more powerful parser for show functions
c645a6b [Cheng Hao] fix bug in unit test
78d40f1 [Cheng Hao] update the padding issue for usage
48ee4b3 [Cheng Hao] update as feedback
70eb4e9 [Cheng Hao] add show/describe function support
Diffstat (limited to 'sql/hive')
3 files changed, 78 insertions, 4 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 54bf6bd67f..8732e9abf8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -76,8 +76,32 @@ private[hive] class HiveFunctionRegistry(underlying: analysis.FunctionRegistry) } } - override def registerFunction(name: String, builder: FunctionBuilder): Unit = - underlying.registerFunction(name, builder) + override def registerFunction(name: String, info: ExpressionInfo, builder: FunctionBuilder) + : Unit = underlying.registerFunction(name, info, builder) + + /* List all of the registered function names. */ + override def listFunction(): Seq[String] = { + val a = FunctionRegistry.getFunctionNames ++ underlying.listFunction() + a.toList.sorted + } + + /* Get the class of the registered function by specified name. */ + override def lookupFunction(name: String): Option[ExpressionInfo] = { + underlying.lookupFunction(name).orElse( + Try { + val info = FunctionRegistry.getFunctionInfo(name) + val annotation = info.getFunctionClass.getAnnotation(classOf[Description]) + if (annotation != null) { + Some(new ExpressionInfo( + info.getFunctionClass.getCanonicalName, + annotation.name(), + annotation.value(), + annotation.extended())) + } else { + None + } + }.getOrElse(None)) + } } private[hive] case class HiveSimpleUDF(funcWrapper: HiveFunctionWrapper, children: Seq[Expression]) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index efb04bf3d5..638b9c8103 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -370,7 +370,11 @@ abstract class HiveComparisonTest // Check that the results match unless its an EXPLAIN query. val preparedHive = prepareAnswer(hiveQuery, hive) - if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && preparedHive != catalyst) { + // We will ignore the ExplainCommand, ShowFunctions, DescribeFunction + if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) && + (!hiveQuery.logical.isInstanceOf[ShowFunctions]) && + (!hiveQuery.logical.isInstanceOf[DescribeFunction]) && + preparedHive != catalyst) { val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index ff42fdefaa..013936377b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -19,9 +19,11 @@ package org.apache.spark.sql.hive.execution import java.sql.{Date, Timestamp} +import scala.collection.JavaConversions._ + import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.DefaultParserDialect -import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries +import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, EliminateSubQueries} import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hive.test.TestHive @@ -138,6 +140,50 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils { (1 to 6).map(_ => Row("CA", 20151))) } + test("show functions") { + val allFunctions = + (FunctionRegistry.builtin.listFunction().toSet[String] ++ + org.apache.hadoop.hive.ql.exec.FunctionRegistry.getFunctionNames).toList.sorted + checkAnswer(sql("SHOW functions"), allFunctions.map(Row(_))) + checkAnswer(sql("SHOW functions abs"), Row("abs")) + checkAnswer(sql("SHOW functions 'abs'"), Row("abs")) + checkAnswer(sql("SHOW functions abc.abs"), Row("abs")) + checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs")) + checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs")) + checkAnswer(sql("SHOW functions `~`"), Row("~")) + checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil) + checkAnswer(sql("SHOW functions `weekofyea.*`"), Row("weekofyear")) + // this probably will failed if we add more function with `sha` prefixing. + checkAnswer(sql("SHOW functions `sha.*`"), Row("sha") :: Row("sha1") :: Row("sha2") :: Nil) + } + + test("describe functions") { + // The Spark SQL built-in functions + checkExistence(sql("describe function extended upper"), true, + "Function: upper", + "Class: org.apache.spark.sql.catalyst.expressions.Upper", + "Usage: upper(str) - Returns str with all characters changed to uppercase", + "Extended Usage:", + "> SELECT upper('SparkSql')", + "'SPARKSQL'") + + checkExistence(sql("describe functioN Upper"), true, + "Function: upper", + "Class: org.apache.spark.sql.catalyst.expressions.Upper", + "Usage: upper(str) - Returns str with all characters changed to uppercase") + + checkExistence(sql("describe functioN Upper"), false, + "Extended Usage") + + checkExistence(sql("describe functioN abcadf"), true, + "Function: abcadf is not found.") + + checkExistence(sql("describe functioN `~`"), true, + "Function: ~", + "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot", + "Usage: ~ n - Bitwise not") + } + test("SPARK-5371: union with null and sum") { val df = Seq((1, 1)).toDF("c1", "c2") df.registerTempTable("table1") |