aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYong Tang <yong.tang.github@outlook.com>2016-04-09 13:54:30 -0700
committerYin Huai <yhuai@databricks.com>2016-04-09 13:54:30 -0700
commitcd2fed70129ba601f8c849a93eeb44a5d69c2402 (patch)
treebc96c68ad2c4615ac0de174e28e2db3cafa7bc31
parentf7ec854f1b7f575c4c7437daf8e6992c684b6de2 (diff)
downloadspark-cd2fed70129ba601f8c849a93eeb44a5d69c2402.tar.gz
spark-cd2fed70129ba601f8c849a93eeb44a5d69c2402.tar.bz2
spark-cd2fed70129ba601f8c849a93eeb44a5d69c2402.zip
[SPARK-14335][SQL] Describe function command returns wrong output
## What changes were proposed in this pull request? …because some of built-in functions are not in function registry. This fix tries to fix issues in `describe function` command where some of the outputs still shows Hive's function because some built-in functions are not in FunctionRegistry. The following built-in functions have been added to FunctionRegistry: ``` - ! * / & % ^ + < <= <=> = == > >= | ~ and in like not or rlike when ``` The following listed functions are not added, but hard coded in `commands.scala` (hvanhovell): ``` != <> between case ``` Below are the existing result of the above functions that have not been added: ``` spark-sql> describe function `!=`; Function: <> Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual Usage: a <> b - Returns TRUE if a is not equal to b ``` ``` spark-sql> describe function `<>`; Function: <> Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual Usage: a <> b - Returns TRUE if a is not equal to b ``` ``` spark-sql> describe function `between`; Function: between Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween Usage: between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c ``` ``` spark-sql> describe function `case`; Function: case Class: org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f ``` ## How was this patch tested? Existing tests passed. Additional test cases added. Author: Yong Tang <yong.tang.github@outlook.com> Closes #12128 from yongtang/SPARK-14335.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala33
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala44
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala30
3 files changed, 86 insertions, 21 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f239b33e44..f2abf136da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -171,6 +171,7 @@ object FunctionRegistry {
expression[Rand]("rand"),
expression[Randn]("randn"),
expression[CreateStruct]("struct"),
+ expression[CaseWhen]("when"),
// math functions
expression[Acos]("acos"),
@@ -217,6 +218,12 @@ object FunctionRegistry {
expression[Tan]("tan"),
expression[Tanh]("tanh"),
+ expression[Add]("+"),
+ expression[Subtract]("-"),
+ expression[Multiply]("*"),
+ expression[Divide]("/"),
+ expression[Remainder]("%"),
+
// aggregate functions
expression[HyperLogLogPlusPlus]("approx_count_distinct"),
expression[Average]("avg"),
@@ -257,6 +264,7 @@ object FunctionRegistry {
expression[Lower]("lcase"),
expression[Length]("length"),
expression[Levenshtein]("levenshtein"),
+ expression[Like]("like"),
expression[Lower]("lower"),
expression[StringLocate]("locate"),
expression[StringLPad]("lpad"),
@@ -267,6 +275,7 @@ object FunctionRegistry {
expression[RegExpReplace]("regexp_replace"),
expression[StringRepeat]("repeat"),
expression[StringReverse]("reverse"),
+ expression[RLike]("rlike"),
expression[StringRPad]("rpad"),
expression[StringTrimRight]("rtrim"),
expression[SoundEx]("soundex"),
@@ -343,7 +352,29 @@ object FunctionRegistry {
expression[NTile]("ntile"),
expression[Rank]("rank"),
expression[DenseRank]("dense_rank"),
- expression[PercentRank]("percent_rank")
+ expression[PercentRank]("percent_rank"),
+
+ // predicates
+ expression[And]("and"),
+ expression[In]("in"),
+ expression[Not]("not"),
+ expression[Or]("or"),
+
+ expression[EqualNullSafe]("<=>"),
+ expression[EqualTo]("="),
+ expression[EqualTo]("=="),
+ expression[GreaterThan](">"),
+ expression[GreaterThanOrEqual](">="),
+ expression[LessThan]("<"),
+ expression[LessThanOrEqual]("<="),
+ expression[Not]("!"),
+
+ // bitwise
+ expression[BitwiseAnd]("&"),
+ expression[BitwiseNot]("~"),
+ expression[BitwiseOr]("|"),
+ expression[BitwiseXor]("^")
+
)
val builtin: SimpleFunctionRegistry = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 3fd2a93d29..5d00c805a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -483,20 +483,38 @@ case class DescribeFunction(
}
override def run(sqlContext: SQLContext): Seq[Row] = {
- sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match {
- case Some(info) =>
- val result =
- Row(s"Function: ${info.getName}") ::
- Row(s"Class: ${info.getClassName}") ::
- Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil
-
- if (isExtended) {
- result :+ Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}")
- } else {
- result
- }
+ // Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions.
+ functionName.toLowerCase match {
+ case "<>" =>
+ Row(s"Function: $functionName") ::
+ Row(s"Usage: a <> b - Returns TRUE if a is not equal to b") :: Nil
+ case "!=" =>
+ Row(s"Function: $functionName") ::
+ Row(s"Usage: a != b - Returns TRUE if a is not equal to b") :: Nil
+ case "between" =>
+ Row(s"Function: between") ::
+ Row(s"Usage: a [NOT] BETWEEN b AND c - " +
+ s"evaluate if a is [not] in between b and c") :: Nil
+ case "case" =>
+ Row(s"Function: case") ::
+ Row(s"Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
+ s"When a = b, returns c; when a = d, return e; else return f") :: Nil
+ case _ => sqlContext.sessionState.functionRegistry.lookupFunction(functionName) match {
+ case Some(info) =>
+ val result =
+ Row(s"Function: ${info.getName}") ::
+ Row(s"Class: ${info.getClassName}") ::
+ Row(s"Usage: ${replaceFunctionName(info.getUsage(), info.getName)}") :: Nil
+
+ if (isExtended) {
+ result :+
+ Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, info.getName)}")
+ } else {
+ result
+ }
- case None => Seq(Row(s"Function: $functionName not found."))
+ case None => Seq(Row(s"Function: $functionName not found."))
+ }
}
}
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 14a1d4cd30..d7ec85c15d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -203,8 +203,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkAnswer(sql("SHOW functions abc.abs"), Row("abs"))
checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
- // TODO: Re-enable this test after we fix SPARK-14335.
- // checkAnswer(sql("SHOW functions `~`"), Row("~"))
+ checkAnswer(sql("SHOW functions `~`"), Row("~"))
checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
checkAnswer(sql("SHOW functions `weekofyea*`"), Row("weekofyear"))
// this probably will failed if we add more function with `sha` prefixing.
@@ -236,11 +235,28 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
checkExistence(sql("describe functioN abcadf"), true,
"Function: abcadf not found.")
- // TODO: Re-enable this test after we fix SPARK-14335.
- // checkExistence(sql("describe functioN `~`"), true,
- // "Function: ~",
- // "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot",
- // "Usage: ~ n - Bitwise not")
+ checkExistence(sql("describe functioN `~`"), true,
+ "Function: ~",
+ "Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
+ "Usage: To be added.")
+
+ // Hard coded describe functions
+ checkExistence(sql("describe function `<>`"), true,
+ "Function: <>",
+ "Usage: a <> b - Returns TRUE if a is not equal to b")
+
+ checkExistence(sql("describe function `!=`"), true,
+ "Function: !=",
+ "Usage: a != b - Returns TRUE if a is not equal to b")
+
+ checkExistence(sql("describe function `between`"), true,
+ "Function: between",
+ "Usage: a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c")
+
+ checkExistence(sql("describe function `case`"), true,
+ "Function: case",
+ "Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
+ "When a = b, returns c; when a = d, return e; else return f")
}
test("SPARK-5371: union with null and sum") {